int main(void) { const int size[NSTEPS] = {500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000}; // specification of array sizes static const double epsilon = 0.00001; // maximum floating point error allowed between doubles to be considered equal const double flops_per_iteration = 2.0; int i; // index variables for looping double time[2]; // elapsed time. 0 is test case, 1 is base case int mflops[2]; // calculated mflops. 0 is test case, 1 is base case double* a; double* b; double* ctest; double* cbase; stopwatch* sw = stopwatch_new(); for(i = 0; i < NSTEPS; i++) { int n = size[i]; int n2 = n * n; a = (double*) malloc(n2*sizeof(double)); b = (double*) malloc(n2*sizeof(double)); ctest = (double*) malloc(n2*sizeof(double)); cbase = (double*) malloc(n2*sizeof(double)); rand_square_double_matrix(i+10, n, a); rand_square_double_matrix(i+11, n, b); zero_square_double_matrix(n, ctest); zero_square_double_matrix(n, cbase); stopwatch_restart(sw); my_dgemm(n, a, b, ctest); stopwatch_stop(sw); time[0] = stopwatch_time(sw); stopwatch_restart(sw); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, a, n, b, n, 1.0, cbase, n); stopwatch_stop(sw); //printf("A\n"); //print_square_matrix(n, a); //printf("B\n"); //print_square_matrix(n, b); //printf("my_dgemm\n"); //print_square_matrix(n, ctest); //printf("cdgemm\n"); //print_square_matrix(n, cbase); assert_equal(n, ctest, cbase, epsilon); time[1] = stopwatch_time(sw); mflops[0] = calc_mflops(flops_per_iteration, n, time[0]); mflops[1] = calc_mflops(flops_per_iteration, n, time[1]); printf("%d, %5.2f, %d, %5.2f, %d\n", n, time[0], mflops[0], time[1], mflops[1]); free(a); free(b); free(ctest); free(cbase); } stopwatch_delete(sw); return 0; }
void stopwatch_stop(struct timeval *start_time) { struct timeval *end_time = stopwatch_time(); unsigned long msec; msec = (end_time->tv_sec - start_time->tv_sec) * 1000; msec += (end_time->tv_usec - start_time->tv_usec) / 1000; info("request completed in %lu ms", msec); free(start_time); free(end_time); }