int main(int argc, char *argv[]) { int n; REAL *y_omp, *y_ompacc, *x; REAL a = 123.456; #pragma omp target device(mpi:all) begin n = VEC_LEN; y_omp = (REAL *) malloc(n * sizeof(REAL)); y_ompacc = (REAL *) malloc(n * sizeof(REAL)); x = (REAL *) malloc(n * sizeof(REAL)); #pragma omp target device(mpi:all) end #pragma omp target device(mpi:master) begin srand48(1<<12); init(x, n); init(y_ompacc, n); memcpy(y_ompacc, y_omp, n*sizeof(REAL)); #pragma omp target device(mpi:master) end int num_threads; // #pragma omp parallel shared (num_threads) { if (omp_get_thread_num() == 0) num_threads = omp_get_num_threads(); } /* CPU threading version*/ double omp_time = read_timer(); axpy_omp(x, y_omp, n, a); omp_time = read_timer() - omp_time; /* openmp acc version */ double ompacc_time = read_timer(); axpy_ompacc(x, y_ompacc, n, a); ompacc_time = read_timer() - ompacc_time; printf("axpy(%d): checksum: %g; time(s):\tOMP(%d threads)\tOMPACC\n", n, check(y_omp, y_ompacc, n),num_threads); printf("\t\t\t\t\t\t%4f\t%4f\n", omp_time, ompacc_time); free(y_omp); free(y_ompacc); free(x); return 0; }
int main(int argc, char *argv[]) { int n; REAL *y_ompacc, *x; REAL a = 123.456; n = VEC_LEN; y_ompacc = (REAL *) malloc(n * sizeof(REAL)); x = (REAL *) malloc(n * sizeof(REAL)); srand48(1<<12); init(x, n); init(y_ompacc, n); /* openmp acc version */ axpy_ompacc(x, y_ompacc, n, a); free(y_ompacc); free(x); return 0; }