int main(void) { const int size[NSTEPS] = {500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000}; // specification of array sizes static const double epsilon = 0.00001; // maximum floating point error allowed between doubles to be considered equal const double flops_per_iteration = 2.0; int i; // index variables for looping double time[2]; // elapsed time. 0 is test case, 1 is base case int mflops[2]; // calculated mflops. 0 is test case, 1 is base case double* a; double* b; double* ctest; double* cbase; stopwatch* sw = stopwatch_new(); for(i = 0; i < NSTEPS; i++) { int n = size[i]; int n2 = n * n; a = (double*) malloc(n2*sizeof(double)); b = (double*) malloc(n2*sizeof(double)); ctest = (double*) malloc(n2*sizeof(double)); cbase = (double*) malloc(n2*sizeof(double)); rand_square_double_matrix(i+10, n, a); rand_square_double_matrix(i+11, n, b); zero_square_double_matrix(n, ctest); zero_square_double_matrix(n, cbase); stopwatch_restart(sw); my_dgemm(n, a, b, ctest); stopwatch_stop(sw); time[0] = stopwatch_time(sw); stopwatch_restart(sw); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, a, n, b, n, 1.0, cbase, n); stopwatch_stop(sw); //printf("A\n"); //print_square_matrix(n, a); //printf("B\n"); //print_square_matrix(n, b); //printf("my_dgemm\n"); //print_square_matrix(n, ctest); //printf("cdgemm\n"); //print_square_matrix(n, cbase); assert_equal(n, ctest, cbase, epsilon); time[1] = stopwatch_time(sw); mflops[0] = calc_mflops(flops_per_iteration, n, time[0]); mflops[1] = calc_mflops(flops_per_iteration, n, time[1]); printf("%d, %5.2f, %d, %5.2f, %d\n", n, time[0], mflops[0], time[1], mflops[1]); free(a); free(b); free(ctest); free(cbase); } stopwatch_delete(sw); return 0; }
int main(void) { fill_mat((double*)a); // print_mat((double*)a); stopwatch_restart(); // assume L[i][i] == 1 // #pragma omp parallel for // for (size_t j = 0; j < N; j++){ // for (size_t i = 0; i < N; i++){ // if(i <= j){ // double sum = 0; // for (int k = 0; k < i; k++) // sum += a[i][k] * a[k][j]; // a[i][j] -= sum; // } // if(i > j){ // double sum = 0; // for (int k = 0; k < j; k++) // sum += a[i][k] * a[k][j]; // a[i][j] = (a[i][j] - sum) / a[j][j]; // } // } // } // //LU-decomposition based on Gaussian Elimination // - Arranged so that the multiplier doesn't have to be computed multiple times for(int k = 0; k < N-1; k++){ //iterate over rows/columns for elimination // The "multiplier" is the factor by which a row is multiplied when // being subtracted from another row. for(int row = k + 1; row < N; row++){ // the multiplier only depends on (k,row), // it is invariant with respect to col double factor = a[row][k]/a[k][k]; //Eliminate entries in sub (subtract rows) for(int col = k + 1; col < N; col++){ //column a[row][col] = a[row][col] - factor*a[k][col]; } a[row][k] = factor; } } printf("time = %llu us\n", (long long unsigned)stopwatch_record()); // print_mat((double*)a); return 0; }
int main(int argc, char **argv) { // if (argc != 2) { // printf("usage: n\n"); // return -1; // } //int nnn = atoi(argv[1]); //int n = 1 << nnn; int n = N; srand(time(0)); Matrix A, B, C; A.width = A.height = n; A.elements = (float *)malloc(sizeof(float) * n * n); B.width = B.height = n; B.elements = (float *)malloc(sizeof(float) * n * n); C.width = C.height = n; C.elements = (float *)malloc(sizeof(float) * n * n); fill_Matrix(A); //print_Matrix(A); //printf("\n"); fill_Matrix(B); //print_Matrix(B); //printf("\n"); stopwatch_restart(); MatMul(A, B, C); printf("time = %llu us \n", (long long unsigned)stopwatch_record()); //print_Matrix(C); }