int main() { double start, time; float *a, *b, *c, *c_cpu; int i, j, k; /* bytes to be allocated for one matrix */ const size_t nBytes = SIZE * SIZE * sizeof(float); a = (float*)malloc(nBytes); b = (float*)malloc(nBytes); c = (float*)malloc(nBytes); c_cpu = (float*)malloc(nBytes); // Initialize matrices. mm_cpu_initialize(a, b, c); time = gtod(); start = time; /* Run OpenACC versions of the matrix multiplication */ #ifdef _OPENACC mm_oacc_kernel(a, b, c); printf("mm_oacc_kernel(): %lf sec \n", gtod()-time); time = gtod(); mm_oacc_kernel_with_init(a, b, c); printf("mm_oacc_kernel_with_init(): %lf sec \n", gtod()-time); time = gtod(); mm_oacc_parallel_with_init(a, b, c); printf("mm_oacc_parallel_with_init(): %lf sec \n", gtod()-time); #endif /* Initialize the CPU result matrix */ for(i = 0; i < SIZE; ++i) for(j = 0; j < SIZE; ++j) c_cpu[i*SIZE + j] = 0.0f; time = gtod(); /* Perform the matrix multiplication on the CPU */ mm_cpu_compute(a, b, c_cpu); printf("MM on CPU: %lf sec \n", gtod()-time); /* not necessary here, but if the async clause is used make sure OpenACC tasks are finished */ #pragma acc wait printf("Total runtime: %lf sec \n", gtod()-start); check_results(c, c_cpu); return 0; }
int main(int argc, char **argv) { unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); if (!sysinfo_ehdr) { printf("AT_SYSINFO_EHDR is not present!\n"); return 0; } vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); /* Find gettimeofday. */ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); if (!gtod) { printf("Could not find __vdso_gettimeofday\n"); return 1; } struct timeval tv; long ret = gtod(&tv, 0); if (ret == 0) { printf("The time is %lld.%06lld\n", (long long)tv.tv_sec, (long long)tv.tv_usec); } else { printf("__vdso_gettimeofday failed\n"); } return 0; }
int main( int argc, char** argv ) { double t_start, t_end; double gflops; uint32_t dim = 0; uint32_t step = 32; uint32_t max = 1024; printf( "\nMatrix matrix multiply example:\n\n" ); while ( dim < max ) { if ( dim < 256 ) { dim += step; } else if ( dim < 512 ) { dim += step*2; } else if ( dim < 1024 ) { dim += step*4; } else if ( dim < 2048 ) { dim += step*8; } else if ( dim < 4048 ) { dim += step*16; } else { dim += step*32; } double* A = random_mat( dim ); double* B = random_mat( dim ); double* C = zero_mat( dim ); if ( A == NULL || B == NULL || C == NULL ) { printf( "Allocation of matrix failed.\n" ); exit( EXIT_FAILURE ); } t_start = gtod(); uint32_t i_mult_dim, i_mult_dim_add_j, i, j, k; /* Begin matrix matrix multiply kernel */ for ( i = 0; i < dim; i++ ) { i_mult_dim = i * dim; for (j = 0; j < dim; j++ ) { i_mult_dim_add_j = i_mult_dim + k; for ( k = 0; k < dim; k++ ) { // C[i][j] += A[i][k] * B[k][j] C[ i_mult_dim_add_j ] += A[ i_mult_dim + k ] * B[ k * dim + j ]; } } } /* End matrix matrix multiply kernel */ t_end = gtod(); gflops = ( ( double )2 * dim * dim * dim / 1000000000.0 ) / ( t_end - t_start ); printf("Dim: %4d runtime: %7.4fs GFLOP/s: %0.2f\n", dim, t_end - t_start, gflops ); /* printf("%4d & %7.4fs & %0.2f \\\\ \n \\hline \n", dim, t_end - t_start, gflops );*/ free( A ); free( B ); free( C ); } printf("\n"); return EXIT_SUCCESS; }