int main(int argc, char* argv[]) { int i, j; double tstart, tstop; double nflop; double tmmult, tdgemm; for( i=0; i<SIZE_M; i++ ) { for( j=0; j<SIZE_N; j++ ) { A[i][j]=(double)(i)+(double)(j); } } for( i=0; i<SIZE_N; i++ ) { for( j=0; j<SIZE_K; j++ ) { B[i][j]=(double)(i)+(double)(j); } } nflop = 2.0*(double)SIZE_M*(double)SIZE_N*(double)SIZE_K; MYTIMESTAMP(tstart); mmult( A, B, C); MYTIMESTAMP(tstop); tmmult = tstop-tstart; MYTIMESTAMP(tstart); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, SIZE_M, SIZE_N, SIZE_K, 1.0, (const double*)A, SIZE_M, (const double*)B, SIZE_N, 0.0, (double*)C, SIZE_K); MYTIMESTAMP(tstop); tdgemm = tstop-tstart; fprintf(stderr, "#M,N,K,tmmult,tdgemm,gflops_mmult,gflops_dgemm\n"); fprintf(stderr, "%d,%d,%d,%f,%f,%f,%f\n", SIZE_M, SIZE_N, SIZE_K, tmmult, tdgemm, 1.0e-6*nflop/tmmult, 1.0e-6*nflop/tdgemm); executeUnloopMethod(nflop, mmult_unroll4, "Unloop 4"); executeUnloopMethod(nflop, mmult_unroll8, "Unloop 8"); executeUnloopMethod(nflop, mmult_unroll16, "Unloop 16"); executeUnloopMethod(nflop, mmult_unroll24, "Unloop 24"); executeUnloopMethod(nflop, mmult_unroll28, "Unloop 28"); executeUnloopMethod(nflop, mmult_unroll222, "Unloop 222"); return 0; }
void executeUnloopMethod(double nflops, void (*unrollMethod) (double (*)[SIZE_N], double (*)[SIZE_N], double (*)[SIZE_N]), char *name) { double tstart, tstop, tmmult; MYTIMESTAMP(tstart); (*unrollMethod)( A, B, C); MYTIMESTAMP(tstop); tmmult = tstop-tstart; fprintf(stderr, "%s: %fsec, %fgflops\n", name, tmmult, 1.0e-6*nflops/tmmult); if(CHECK == 1){ fprintf(stderr, "%s\t", name); compare(C); } }
int main(int argc, char *argv[]) { long i; long count = 0; double tstart, tstop, time; //printf ("Max Threads: %d\n", omp_get_max_threads()); //printf ("Num Threads: %d\n", omp_get_num_threads()); MYTIMESTAMP(tstart); #pragma omp parallel for reduction(+: count) for (i = 2; i <= NUM_ITERATIONS; i++) { count += isprime(i); } MYTIMESTAMP(tstop); time = tstop-tstart; printf("prime count = %ld\n", count); printf("Time: %.*f\n", 2, time); return 0; }
int main(int argc, char** argv) { if (argc != 2) { fprintf(stderr, "Usage: avg num_elements_per_proc\n"); exit(1); } double start, stop; int num_elements_per_proc = atoi(argv[1]); // Seed the random number generator to get different results each time srand(time(NULL)); MYTIMESTAMP(start); MPI_Init(NULL, NULL); int world_rank; MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); int world_size; MPI_Comm_size(MPI_COMM_WORLD, &world_size); // Create a random array of elements on all processes. srand(world_rank); float *rand_nums = NULL; rand_nums = create_rand_nums(num_elements_per_proc); // Sum the numbers locally float local_sum = 0; int i; for (i = 0; i < num_elements_per_proc; i++) { local_sum += rand_nums[i]; } // Reduce all of the local sums into the global sum in order to // calculate the mean float global_sum; MPI_Allreduce(&local_sum, &global_sum, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); float mean = global_sum / (num_elements_per_proc * world_size); // Compute the local sum of the squared differences from the mean float local_sq_diff = 0; for (i = 0; i < num_elements_per_proc; i++) { local_sq_diff += (rand_nums[i] - mean) * (rand_nums[i] - mean); } // Reduce the global sum of the squared differences to the root process // and print off the answer float global_sq_diff; MPI_Reduce(&local_sq_diff, &global_sq_diff, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD); // The standard deviation is the square root of the mean of the squared // differences. if (world_rank == 0) { float stddev = sqrt(global_sq_diff / (num_elements_per_proc * world_size)); printf("Mean - %f, Standard deviation = %f\n", mean, stddev); } // Clean up free(rand_nums); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); if (world_rank == 0) { MYTIMESTAMP(stop); printf("number of seconds: %f\n", stop - start); } }