int main(){ int nthreads = 4; omp_set_num_threads(nthreads); #pragma omp parallel fprintf(stderr,"nthreads %d \n", omp_get_num_threads()); int n3 = 128; int n2 = 128; int n1 = 128; // float ***array = sf_floatalloc3(n1,n2,n3); float *array = fftwf_alloc_real(n3*n2*n1); fftwf_complex* cout = fftwf_alloc_complex(n3*n2*n1); int err = fftwf_init_threads(); if (err == 0) { fprintf(stderr,"something went wrong with fftw\n"); } fprintf(stderr,"Got here\n"); double start,end; start = omp_get_wtime()*omp_get_wtick(); fftwf_plan_with_nthreads(nthreads); fftwf_plan plan = fftwf_plan_dft_r2c_3d( n1,n2,n3, array,cout, FFTW_MEASURE); end = omp_get_wtime()*omp_get_wtick(); fprintf(stderr,"elapsed time: %f %f %f\n",end,start,end-start); for(int i = 0; i < n3*n2*n1; ++i) array[i] = rand()/RAND_MAX; //float start = clock()/CLOCKS_PER_SEC; start = omp_get_wtime(); for(int i=0; i < 1001; ++i) fftwf_execute(plan); //float end = clock()/CLOCKS_PER_SEC; end = omp_get_wtime(); fprintf(stderr,"elapsed time: %f time/calc %f\n", end-start,(end-start)/100.0); fftwf_cleanup_threads(); fftwf_cleanup(); fftwf_destroy_plan(plan); fftwf_free(cout); fftwf_free(array); //free(**array); free(*array); free(array); return 0; }
int test_omp_get_wtick() { double tick; tick = -1.; tick = omp_get_wtick (); return ((tick > 0.0) && (tick < 0.01)); }
int main(int argc, char* argv[]) { signal(SIGINT, sigint_handler); #if !defined(NDEBUG) std::cout << "\t> Running in DEBUG mode" << std::endl; #endif #if defined(OPENMP_FOUND) omp_set_nested(true); std::cout << "\t> Running using OPENMP " << std::endl; std::cout << "\t\t> " << omp_get_max_threads() << " threads max" << std::endl; std::cout << "\t\t> " << omp_get_wtick()*1e9 << "ns tick" << std::endl; assert( omp_get_nested() ); #endif // test_random(); Rng rng; rng.seed(rand()); Options options = parse_options(argc, argv); typedef std::map<std::string, int> Wins; Wins wins; for (int kk=0; kk<options.number_of_games; kk++) { std::cout << std::endl << std::endl; std::cout << "****************************************" << std::endl; std::cout << "game " << kk << "/" << options.number_of_games << std::endl; const Game& game = play_game(options, rng); const int winner = game.state.get_winner(); if (winner < 0) wins["draw"]++; else { std::string winner_name = "bot"; if (game.hero_infos[winner].is_real_bot()) winner_name = game.hero_infos[winner].name; wins[winner_name]++; } std::cout << std::endl; std::cout << "after " << options.number_of_games << " games" << std::endl; for (Wins::const_iterator wi=wins.begin(), wie=wins.end(); wi!=wie; wi++) { if (wi->first == "draw") { std::cout << " " << wi->second << " draw" << std::endl; continue; } std::cout << " " << wi->second << " victory for " << wi->first << std::endl; } if (sigint_already_caught) break; } return 0; }
int main(int argc, char *argv[]) { QLA_Real sum, *r1; QLA_Complex *c1; QLA_ColorVector *v1, *v2, *v3, *v4, *v5; QLA_ColorVector **vp1, **vp2, **vp3, **vp4; QLA_HalfFermion *h1, *h2, **hp1; QLA_DiracFermion *d1, *d2, **dp1; QLA_ColorMatrix *m1, *m2, *m3, *m4, **mp1; double flop, mem, time1; int nmin, nmax, c, nthreads=1; printf("QLA_Precision = %c\n", QLA_Precision); #ifdef _OPENMP nthreads = omp_get_max_threads(); printf("OMP THREADS = %i\n", nthreads); printf("omp_get_wtick = %g\n", omp_get_wtick()); #ifdef CPU_ZERO #pragma omp parallel { int tid = omp_get_thread_num(); cpu_set_t set; CPU_ZERO(&set); CPU_SET(tid, &set); sched_setaffinity(0, sizeof(set), &set); } #endif #endif nmin = 64*nthreads; nmax = 256*1024*nthreads; r1 = myalloc(QLA_Real, nmax); c1 = myalloc(QLA_Complex, nmax); v1 = myalloc(QLA_ColorVector, nmax); v2 = myalloc(QLA_ColorVector, nmax); vp1 = myalloc(QLA_ColorVector *, nmax); d1 = myalloc(QLA_DiracFermion, nmax); d2 = myalloc(QLA_DiracFermion, nmax); dp1 = myalloc(QLA_DiracFermion *, nmax); m1 = myalloc(QLA_ColorMatrix, nmax); m2 = myalloc(QLA_ColorMatrix, nmax); m3 = myalloc(QLA_ColorMatrix, nmax); mp1 = myalloc(QLA_ColorMatrix *, nmax); for(int n=nmin; n<=nmax; n*=2) { printf("len = %i\n", n); printf("len/thread = %i\n", n/nthreads); double cf = 9.e9/n; #include "benchfuncs.c" } return 0; }
int main(int argc, char *argv[ ]) { double prec = omp_get_wtick(); //fprintf( stderr, "Clock precision = %g\n", prec ); for (int i = 0; i < NUM; i++) { A[i] = Ranf(-10.f, 10.f); B[i] = Ranf(-10.f, 10.f); } /**************************** * SIMD test block * **************************/ double time0 = Timer(); for (int t = 0; t < NUM_TRIALS; t++) { SimdMul(A, B, C, NUM); } double time1 = Timer(); double dts = (time1 - time0) / (float) NUM_TRIALS; if (PRINT_SIMD == 1) { if(GNUPLOT == 0) { printf("Average SIMD Elapsed time = %g\n", dts); printf("SIMD speed = %8.3f MFLOPS\n", ((float) NUM / dts) / 1000000.f); } else { // x-axis: #-of-elements y-axis: MFLOPS, do not need elapsed time printf("%d %8.3f\n", NUM, ((float) NUM / dts) / 1000000.f); } } /**************************** * non SIMD test block * **************************/ double time2 = Timer(); for (int t = 0; t < NUM_TRIALS; t++) { NonSimdMul(A, B, C, NUM); } double time3 = Timer(); double dtn = (time3 - time2) / (float) NUM_TRIALS; if(PRINT_NOSIMD == 1) { if(GNUPLOT == 0) { printf("Average Non-SIMD Elapsed time = %g\n", dtn); printf("Non-SIMD speed = %8.3f MFLOPS\n", ((float) NUM / dtn) / 1000000.f); //printf("Speed-up = %g\n", dtn / dts); } else { // x-axis: #-of-elements y-axis: MFLOPS, do not need elapsed time printf("%d %8.3f\n", NUM, ((float) NUM / dtn) / 1000000.f); } } if(PRINT_DIFFERENCE == 1) { printf("%d %g\n", NUM, ((float) NUM / dtn) / (dtn/dts)); } return 0; }
int main() { printf("omp_get_num_threads() [default value] = %d\n", omp_get_num_threads()); printf("omp_get_max_threads() = %d \n", omp_get_max_threads()); printf("omp_get_num_procs() = %d\n", omp_get_num_procs()); printf("\n"); omp_set_num_threads(2); // that affectsomp_get_max_threads() #pragma omp parallel for ordered for (int i = 0; i < omp_get_max_threads(); i ++) { printf("Thread %d of total %d thread\n", omp_get_thread_num(), omp_get_num_threads()); } printf("omp_get_num_threads() = %d (Always one in the sequencial part)\n", omp_get_num_threads()); printf("\n"); printf("omp_get_wtime() = %f\n", omp_get_wtime() ); printf("omp_get_wtick() = %f\n", omp_get_wtick() ); return 0; }
int main(int args, char **argv){ int size, MyP, i, j, v, k, d, p, J; int * mas; long double MAX; double wtime1, wtime2, wtick; /* Каждая ветвь генерирует свою полосу матрицы A и свой отрезок вектора * правой части, который присоединяется дополнительным столбцом к A. * Нулевая ветвь генерирует нулевую полосу, первая ветвь - первую полосу * и т.д. (По диагонали исходной матрицы - числа = 2, остальные числа = 1). */ wtime1 = omp_get_wtime(); wtick = omp_get_wtick(); wtime2 = omp_get_wtime(); srand((int)((wtime2-wtime1)/wtick)); for (i = 0; i < M; i++){ for (j = 0; j < M+1; j++){ fscanf(stdin, "%Lf", &MA[i][j]); } } printMatrix(); memcpy(MA2, MA, sizeof(long double)*M*(M+1)); for (i = 0; i < M; i++ ) OTV[i] = i; wtime1 = omp_get_wtime(); for (i = 0; i < M; i++){ #pragma omp parallel shared(mas, size, i, MA, MAD) private(j, MyP, MAX) { MyP = omp_get_thread_num(); #pragma omp single { size = omp_get_num_threads(); mas = malloc(sizeof(int)*size); } MAX = fabsl(MA[i][i]); mas[MyP] = i; #pragma omp for for (j = i+1; j < M; j++){ if (fabsl(MA[j][i]) > MAX){ MAX = fabsl(MA[j][i]); mas[MyP] = j; } } #pragma omp single { J = i; MAX = fabsl(MA[J][i]); for (j = 0; j < size; j++){ if (fabsl(MA[mas[j]][i]) > MAX){ J = mas[j]; MAX = fabsl(MA[J][i]); } } if (J != i){ memcpy(V, &MA[i][i], sizeof(long double)*(M+1-i)); memcpy(&MA[i][i], &MA[J][i], sizeof(long double)*(M+1-i)); memcpy(&MA[J][i], V, sizeof(long double)*(M+1-i)); } free(mas); printMatrix(); } #pragma omp for for (j = M; j > i; j--){ if (MA[i][i] != 0){ //printf("%d: MA[%d][%d] = %.2f\n", MyP, i, j, MA[i][j]); MA[i][j] /= MA[i][i]; //printf("%d: MA[%d][%d] = %.2f\n", MyP, i, j, MA[i][j]); }else printf("ERROR DIV BY ZERO %d: MA[%d][%d] = %.2Lf\n", MyP, i, j, MA[i][j]); } #pragma omp master MA[i][i] = 1; #pragma omp single { printMatrix(); } #pragma omp for private(d) for (k = i+1; k < M; k++){ for (d = M; d >= i; d--){ //printf("%d: %d %d\n", MyP, k, d); //printf("%d: MA[%d][%d] = %.2f -= MA[%d][%d] = %.2f * MA[%d][%d] = %.2f\n", MyP, k, d, MA[k][d], k, i, MA[k][i], i, d, MA[i][d]); MA[k][d] -= MA[k][i]*MA[i][d]; } } #pragma omp single { printMatrix(); } } } #pragma omp for for ( i = 0; i < M; i++ ) X[i] = MA[i][M]; for ( i = M - 2; i >= 0; i-- ) for ( j = i + 1; j < M; j++ ) X[i] -= X[j] * MA[i][j]; wtime2 = omp_get_wtime(); fprintf(stderr, "Время работы программы %.9f\n", wtime2-wtime1); for (i = 0; i < M; i++){ MAD = 0; for (j = 0; j < M; j ++){ MAD += MA2[i][j]*X[j]; } MAD -= MA2[i][M]; if (i < M-1) printf("%.12Lf+", MAD); else printf("%.12Lf\n", MAD); } //printf("\n"); return 0; }
int main (void) { double d, e; int l; omp_lock_t lck; omp_nest_lock_t nlck; d = omp_get_wtime (); omp_init_lock (&lck); omp_set_lock (&lck); if (omp_test_lock (&lck)) abort (); omp_unset_lock (&lck); if (! omp_test_lock (&lck)) abort (); if (omp_test_lock (&lck)) abort (); omp_unset_lock (&lck); omp_destroy_lock (&lck); omp_init_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 1) abort (); omp_set_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 3) abort (); omp_unset_nest_lock (&nlck); omp_unset_nest_lock (&nlck); if (omp_test_nest_lock (&nlck) != 2) abort (); omp_unset_nest_lock (&nlck); omp_unset_nest_lock (&nlck); omp_destroy_nest_lock (&nlck); omp_set_dynamic (1); if (! omp_get_dynamic ()) abort (); omp_set_dynamic (0); if (omp_get_dynamic ()) abort (); omp_set_nested (1); if (! omp_get_nested ()) abort (); omp_set_nested (0); if (omp_get_nested ()) abort (); omp_set_num_threads (5); if (omp_get_num_threads () != 1) abort (); if (omp_get_max_threads () != 5) abort (); if (omp_get_thread_num () != 0) abort (); omp_set_num_threads (3); if (omp_get_num_threads () != 1) abort (); if (omp_get_max_threads () != 3) abort (); if (omp_get_thread_num () != 0) abort (); l = 0; #pragma omp parallel reduction (|:l) { l = omp_get_num_threads () != 3; l |= omp_get_thread_num () < 0; l |= omp_get_thread_num () >= 3; #pragma omp master l |= omp_get_thread_num () != 0; } if (l) abort (); if (omp_get_num_procs () <= 0) abort (); if (omp_in_parallel ()) abort (); #pragma omp parallel reduction (|:l) l = ! omp_in_parallel (); #pragma omp parallel reduction (|:l) if (1) l = ! omp_in_parallel (); if (l) abort (); e = omp_get_wtime (); if (d > e) abort (); d = omp_get_wtick (); /* Negative precision is definitely wrong, bigger than 1s clock resolution is also strange. */ if (d <= 0 || d > 1) abort (); return 0; }
double omp_get_wtick_ (void) { return omp_get_wtick (); }
double timer_getres (void) { return omp_get_wtick (); }
int main(int argc, char *argv[]) { int i; double timeS1, timeS2, timeP1, timeP2, wtick; wtick = omp_get_wtick(); int n = pow(2, atoi(argv[1])); //printf("n=%d; size=%f MB\n", n, (float)((n*sizeof(float))/1000000.0f)); myRNG rng; /** SEQUENTIAL SORT *************************************************/ // initialize random array float *a = (float *)malloc(sizeof(float)*n); int *index = (int *)malloc(sizeof(int)*n); int *rank; rng.resetSeed(10215); for(i=0; i<n; i++) { *(a + i) = rng.next(); *(index + i) = i; } // if option is selected, print starting array if(atoi(argv[2]) == 1) { printf("[%f, ", *(a + 0)); for(i=1; i<(n-1); i++) { printf("%f, ", *(a + i)); } printf("%f]\n", *(a + n - 1)); } // sort array (sequentially) timeS1 = omp_get_wtime(); seqShellSort(a, index, n); timeS2 = omp_get_wtime(); // check array i=0; while(*(a + i) <= *(a + i + 1) && i<n) { i++; } if(i==(n-1)) { //printf("Seq :: Array sorted SUCCESSFULLY in %f seconds.\n", (timeS2-timeS1)); } else { printf("Seq :: Array is NOT sorted. See index %d.\n", i); } // if option is selected, print ending array if(atoi(argv[3]) == 1) { printf("[%f, ", *(a + 0)); for(i=1; i<(n-1); i++) { printf("%f, ", *(a + i)); } printf("%f]\n", *(a + n - 1)); } /** SAMPLE SORT ******************************************************/ // reinitialize random array free(a); free(index); a = (float *)malloc(sizeof(float)*n); index = (int *)malloc(sizeof(int)*n); rng.resetSeed(10215); for(i=0; i<n; i++) { *(a + i) = rng.next(); *(index + i) = i; } // if option is selected, print starting array if(atoi(argv[2]) == 1) { printf("[%f, ", *(a + 0)); for(i=1; i<(n-1); i++) { printf("%f, ", *(a + i)); } printf("%f]\n", *(a + n - 1)); printf("[%f, ", (float)*(index + 0)); for(i=1; i<(n-1); i++) { printf("%f, ", (float)*(index + i)); } printf("%f]\n", (float)*(index + n - 1)); } // sort array (in parallel using Merge) timeP1 = omp_get_wtime(); rank = sampleSort(a, index, n); timeP2 = omp_get_wtime(); // check array i=0; while(*(a + i) <= *(a + i + 1) && i<n) { i++; } if(i==(n-1)) { //printf("Par (Sample) :: Array sorted SUCCESSFULLY in %f seconds.\n", (timeP2-timeP1)); } else { printf("Par (Sample) :: Array is NOT sorted. See index %d.\n", i); } // if option is selected, print ending array if(atoi(argv[3]) == 1) { printf("[%f, ", *(a + 0)); for(i=1; i<(n-1); i++) { printf("%f, ", *(a + i)); } printf("%f]\n", *(a + n - 1)); printf("[%f, ", (float)*(index + 0)); for(i=1; i<(n-1); i++) { printf("%f, ", (float)*(index + i)); } printf("%f]\n", (float)*(index + n - 1)); printf("[%d, ", *(rank + 0)); for(i=1; i<(n-1); i++) { printf("%d, ", *(rank + i)); } printf("%d]\n", *(rank + n - 1)); } printf("%d, %d, %f, %f, %f\n", omp_get_num_threads(), n, (float)((n*sizeof(float))/1000000.0f), (timeS2-timeS1), (timeP2-timeP1)); return 0; }
long get_wtick() { return omp_get_wtick(); }
int main(int argc, char *argv[]) { QLA_Real sum, *r1; QLA_Complex *c1; QLA_ColorVector *v1, *v2, *v3, *v4, *v5; QLA_ColorVector **vp1, **vp2, **vp3, **vp4; QLA_HalfFermion *h1, *h2, **hp1; QLA_DiracFermion *d1, *d2, **dp1; QLA_ColorMatrix *m1, *m2, *m3, *m4, **mp1; double cf0, flop, mem, time1; int nmin, nmax, c, nthreads=1; nmin = 64; if(argc>1) nmin = atoi(argv[1]); nmax = 256*1024; if(argc>2) nmax = atoi(argv[2]); cf0 = 1e9; if(argc>3) cf0 *= atof(argv[3]); printf("QLA version %s (%i)\n", QLA_version_str(), QLA_version_int()); printf("QLA_Precision = %c\n", QLA_Precision); printf("QLA_Nc = %i\n", QLA_Nc); #ifdef _OPENMP nthreads = omp_get_max_threads(); printf("OMP threads = %i\n", nthreads); printf("omp_get_wtick = %g\n", omp_get_wtick()); #ifdef CPU_ZERO #pragma omp parallel { int tid = omp_get_thread_num(); cpu_set_t set; CPU_ZERO(&set); CPU_SET(tid, &set); sched_setaffinity(0, sizeof(set), &set); } #endif #endif nmin *= nthreads; nmax *= nthreads; r1 = myalloc(QLA_Real, nmax); c1 = myalloc(QLA_Complex, nmax); v1 = myalloc(QLA_ColorVector, nmax); v2 = myalloc(QLA_ColorVector, nmax); v3 = myalloc(QLA_ColorVector, nmax); v4 = myalloc(QLA_ColorVector, nmax); v5 = myalloc(QLA_ColorVector, nmax); vp1 = myalloc(QLA_ColorVector *, nmax); vp2 = myalloc(QLA_ColorVector *, nmax); vp3 = myalloc(QLA_ColorVector *, nmax); vp4 = myalloc(QLA_ColorVector *, nmax); h1 = myalloc(QLA_HalfFermion, nmax); h2 = myalloc(QLA_HalfFermion, nmax); hp1 = myalloc(QLA_HalfFermion *, nmax); d1 = myalloc(QLA_DiracFermion, nmax); d2 = myalloc(QLA_DiracFermion, nmax); dp1 = myalloc(QLA_DiracFermion *, nmax); m1 = myalloc(QLA_ColorMatrix, nmax); m2 = myalloc(QLA_ColorMatrix, nmax); m3 = myalloc(QLA_ColorMatrix, nmax); m4 = myalloc(QLA_ColorMatrix, nmax); mp1 = myalloc(QLA_ColorMatrix *, nmax); //QLA_ColorVector *va[4] = { v2, v3, v4, v5 }; QLA_ColorVector **vpa[4] = { vp1, vp2, vp3, vp4 }; QLA_ColorMatrix *ma[4] = { m1, m2, m3, m4 }; for(int n=nmin; n<=nmax; n*=2) { printf("len = %i\n", n); printf("len/thread = %i\n", n/nthreads); double cf = cf0*nthreads/n; #include "benchfuncs.c" } return 0; }
int main(int argc, char *argv[]) { short iN, fN, incN; int nprocs = 0, iam = 0, mat_size, i, j, k; float time, start, finish; double *mat1, *mat2, *sol, mflops; if(argc<4) { printf("\n\nUSAGE: %s size_initial size_final size_increment\n\n",argv[0]); return -1; } iN=atoi(argv[1]); fN=atoi(argv[2]); incN=atoi(argv[3]); for(mat_size=iN; mat_size<=fN; mat_size+=incN) { int mat_size2 = mat_size * mat_size; // Allocating memory to the three matrix. mat1 = (double *) malloc(sizeof(double)*mat_size2); mat2 = (double *) malloc(sizeof(double)*mat_size2); sol = (double *) malloc(sizeof(double)*mat_size2); if(mat1==NULL || mat2==NULL || sol==NULL) printf("\nError in Matrix allocation. Ask Morpheus.\n"); // Generating random values between 0 and 1 for mat1 and mat2. // About sol, we will simply fill it with zeroes. #pragma omp for schedule (dynamic, 20) for (i = 0; i < mat_size2; i++) { mat1[i] = (double)rand()/RAND_MAX; mat2[i] = (double)rand()/RAND_MAX; sol[i] = 0; } // note: nprocs is shared because we need it for later use. #pragma omp parallel shared(nprocs) private(iam) { iam=omp_get_thread_num(); if(iam==0) nprocs=omp_get_num_threads(); } // Clock START. start = omp_get_wtime(); // This block contains the matrix multiplication code. { // This (long x) optimizes the loops a little. long x; double temp; // Where the magic begins. #pragma omp parallel for schedule(runtime) for (i = 0; i < mat_size; i++) { x = i*mat_size; for (j = 0; j < mat_size; j++) { // Using 'reduction' with a temp variable // optimizes the calculation a lot! temp = 0; #pragma omp parallel for reduction(+:temp) for (k = 0; k < mat_size; k++) temp = mat1[x+k] * mat2[k*mat_size+j]; sol[x+j] = temp; } } } // Clock STOP. finish = omp_get_wtime(); time = finish - start; if(time==0.) { printf("\nNot enough precission.\n"); } else { // >>> mflops = (operations/time)/1000000 // > operations = (first for()) + (second for()) + (third for()) // (third for()) = (2*mat_size) <- because we have two floating point operations mflops = ((double)mat_size2*(2.*(double)mat_size)/time)/1000000.; printf("\n>>> Threads = %d\t\tSize = %d\t\tSeconds = %.6lf", nprocs, mat_size, time); printf("\n> Mflops = %.6f\t\tMflops/thread = %.6f",mflops,mflops/nprocs); printf("\n> Precision omp_get_wtick = %lf\n",omp_get_wtick()); } // Free memory like a boss. free(mat1); free(mat2); free(sol); } return EXIT_SUCCESS; }