int main(int argc, char **argv) { int i; float *a, *b; double t; a = (float *)_mm_malloc(sizeof(float) * N, 16); b = (float *)_mm_malloc(sizeof(float) * N, 16); for (i = 0; i < N; i++) { a[i] = 1.0; } t = hpctimer_getwtime(); for (i = 0; i < NREPS; i++) { fun_def(a, b, N); // fun_sse(a, b, N); } t = hpctimer_getwtime() - t; t = t / NREPS; //print_vec(b, N); printf("Elapsed time: %.6f sec.\n", t); _mm_free(a); _mm_free(b); return 0; }
int main(void) { int i, size; struct Foo *Bar; struct Foo *Bar1; double t; size = sizeof (struct Foo) * TN; if ( (Bar = malloc(size)) == NULL) { fprintf(stderr, "No enough memory\n"); exit(EXIT_FAILURE); } if ( (Bar1 = malloc(size)) == NULL) { fprintf(stderr, "No enough memory\n"); exit(EXIT_FAILURE); } memset (Bar, 0 ,sizeof (struct Foo)); memset (Bar1, 0 ,sizeof (struct Foo)); t = hpctimer_getwtime(); int j; for (i = 0; i < TN; i++) { for (j = 0; j < T_SIZE; j++) { Bar1[i].arr[j] = Bar[i].arr[j]; Bar1[i].arr1[j] = Bar[i].arr1[j]; Bar1[i].arr2[j] = Bar[i].arr2[j]; Bar1[i].arr3[j] = Bar[i].arr3[j]; Bar1[i].arr4[j] = Bar[i].arr4[j]; Bar1[i].arr5[j] = Bar[i].arr5[j]; } } t = hpctimer_getwtime() - t; printf("T_SIZE: %d\nElapsed time (sec.): %.6f\n", T_SIZE, t); free(Bar); free(Bar1); return 0; }
int main() { int i; float *a; double t, sum; a = (float *)_mm_malloc(sizeof(*a) * N, 16); for (i = 0; i < N; i++) a[i] = 1.0; t = hpctimer_getwtime(); for (i = 0; i < NREPS; i++) { sum = reduction_sum(a, N); // sum = reduction_sum_sse(a, N); } t = (hpctimer_getwtime() - t) / NREPS; printf("Reduction sum: %.4f (real %.4f)\n", sum, (float)N); printf("Elapsed time: %.6f sec.\n", t); _mm_free(a); return 0; }
int main(int argc, char **argv) { double PI25DT = 3.141592653589793238462643; int i, nsteps; double pi, x, step, sum; double t; int nthreads = 1; nsteps = (argc > 1) ? atoi(argv[1]) : 1000000; step = 1.0 / (double)nsteps; t = hpctimer_getwtime(); sum = 0.0; #pragma omp parallel shared(sum) { #pragma omp master #ifdef _OPENMP nthreads = omp_get_num_threads(); #endif #pragma omp for private(x) for (i = 1; i <= nsteps; i++) { x = (i - 0.5) * step; #pragma omp critical sum = sum + 4.0 / (1.0 + x * x); } } pi = step * sum; t = hpctimer_getwtime() - t; printf("PI is approximately %.16f, Error is %.16f\n", pi, fabs(pi - PI25DT)); printf("(nsteps = %d, step = %f, nthreads = %d)\n", nsteps, step, nthreads); printf("Elapsed time = %.6f sec.\n", t); return EXIT_SUCCESS; }
int main(int argc, char **argv) { int i; double t; init_matrix(A, B, C, N); t = hpctimer_getwtime(); for (i = 0; i < NREPS; i++) { dgemm_def(A, B, C, N); //dgemm_transpose(A, B, C, N); //dgemm_transpose2(A, B, C, N); //dgemm_block(A, B, C, N); } t = hpctimer_getwtime() - t; t = t / NREPS; /*print_matrix(C, N);*/ printf("Elapsed time: %.6f sec.\n", t); return 0; }