/*--------------------------------------------------------------------------- * * Compute matrix product using recursive tiling. * * Input * int argc - length of argv[] array * char* argv[] - pointer to command line parameter array * int verbosity - program verification: verbosity > 0 gives more output * char* order - string indicating loop order, e.g., "ijk" or "jki" * * Output * double - elapsed time for product computation */ double multiply_by_recursive_blocks( int argc, char* argv[], int verbosity, char* order ) { int rows, cols, mids, block_size; double **a, **b, **c; double t1, t2; double sec; double gflop_count; /* * process command line arguments */ rows = atoi( argv[0] ); mids = atoi( argv[1] ); cols = atoi( argv[2] ); block_size = atoi( argv[3] ); gflop_count = 2.0 * rows * mids * cols / 1.0e9; if ( verbosity > 0 ) { printf( "Recursive blocks(%3s): rows = %d, mids = %d, columns = %d\n", order, rows, mids, cols ); printf( "block size = %d\n", block_size ); } /* * allocate and initialize matrices */ a = (double**) allocateMatrix( rows, mids ); b = (double**) allocateMatrix( mids, cols ); c = (double**) allocateMatrix( rows, cols ); initialize_matrices( a, b, c, rows, cols, mids, verbosity ); /* * compute product */ t1 = wtime(); mm_rec( c, a, b, 0, 0, 0, 0, 0, 0, rows, mids, cols, cols, block_size ); t2 = wtime(); sec = t2 - t1; if ( verbosity > 1 ) printf( "checksum = %f\n", checksum( c, rows, cols ) ); printf( "blocks(%3s): %6.3f secs %6.3f gflops ", order, sec, gflop_count / sec ); printf( "( %5d x %5d x %5d ) ( %6d )\n", rows, mids, cols, block_size ); /* * clean up */ deallocateMatrix( a ); deallocateMatrix( b ); deallocateMatrix( c ); return t2 - t1; }
int main() { int n = 4; int **matrix = allocateMatrix(n); fillInMatrixValues(matrix); Submatrix submatrix = computeLargestSumSubmatrix(matrix, n); printf("Largest sum submatrix: \nx1 = %d\ny1 = %d\nx2 = %d\ny2 = %d\n", submatrix.startX, submatrix.startY, submatrix.stopX, submatrix.stopY); deallocateMatrix(matrix, n); return 0; }
/*--------------------------------------------------------------------------- * * Compute matrix product using tiling. The loop order used for the tile * products is specified in string variable "mode". * * Input * int argc - length of argv[] array * char* argv[] - pointer to command line parameter array * int verbosity - program verification: verbosity > 0 gives more output * char* order - string indicating loop order, e.g., "ijk" or "jki" * * Output * double - elapsed time for product computation */ double multiply_by_tiles( int argc, char* argv[], int verbosity, char* order ) { int rows, cols, mids; int rows_per_tile, cols_per_tile, mids_per_tile; int row_start, row_end; int col_start, col_end; int mid_start, mid_end; double **a, **b, **c; double t1, t2; double sec; double gflop_count; /* * process command line arguments */ rows = atoi( argv[0] ); mids = atoi( argv[1] ); cols = atoi( argv[2] ); rows_per_tile = atoi( argv[3] ); mids_per_tile = atoi( argv[4] ); cols_per_tile = atoi( argv[5] ); gflop_count = 2.0 * rows * mids * cols / 1.0e9; if ( verbosity > 0 ) { printf( "Tiles(%3s): rows = %d, mids = %d, columns = %d\n", order, rows, mids, cols ); printf( "block rows = %d, mids = %d, columns = %d\n", rows_per_tile, mids_per_tile, cols_per_tile ); } /* * allocate and initialize matrices */ a = (double**) allocateMatrix( rows, mids ); b = (double**) allocateMatrix( mids, cols ); c = (double**) allocateMatrix( rows, cols ); initialize_matrices( a, b, c, rows, cols, mids, verbosity ); /* * compute product */ t1 = wtime(); for ( row_start = 0; row_start < rows; row_start += rows_per_tile ) { row_end = row_start + rows_per_tile - 1; if ( row_end >= rows ) row_end = rows - 1; for ( col_start = 0; col_start < cols; col_start += cols_per_tile ) { col_end = col_start + cols_per_tile - 1; if ( col_end >= cols ) col_end = cols - 1; for ( mid_start = 0; mid_start < mids; mid_start += mids_per_tile ) { mid_end = mid_start + mids_per_tile - 1; if ( mid_end >= mids ) mid_end = mids - 1; do_product( a, b, c, row_start, row_end, col_start, col_end, mid_start, mid_end ); } } } t2 = wtime(); sec = t2 - t1; if ( verbosity > 1 ) printf( "checksum = %f\n", checksum( c, rows, cols ) ); printf( "tiles(%3s): %6.3f secs %6.3f gflops ", order, sec, gflop_count / sec ); printf( "( %5d x %5d x %5d ) ( %4d x %4d x %4d )\n", rows, mids, cols, rows_per_tile, mids_per_tile, cols_per_tile ); /* * clean up */ deallocateMatrix( a ); deallocateMatrix( b ); deallocateMatrix( c ); return t2 - t1; }
/*--------------------------------------------------------------------------- * * Compute matrix product using BLAS routine DGEMM. * * Input * int argc - length of argv[] array * char* argv[] - pointer to command line parameter array * int verbosity - program verification: verbosity > 0 gives more output * * Output * double - elapsed time for product computation */ double multiply_by_blas( int argc, char* argv[], int verbosity ) { int rows, cols, mids; double **a, **b, **c; double t1, t2; double sec; double gflop_count; /* * process command line arguments */ rows = atoi( argv[0] ); mids = atoi( argv[1] ); cols = atoi( argv[2] ); gflop_count = 2.0 * rows * mids * cols / 1.0e9; if ( verbosity > 0 ) { printf( "BLAS: rows = %d, mids = %d, columns = %d\n", rows, mids, cols ); } /* * allocate and initialize matrices */ a = (double**) allocateMatrix( rows, mids ); b = (double**) allocateMatrix( mids, cols ); c = (double**) allocateMatrix( rows, cols ); initialize_matrices( a, b, c, rows, cols, mids, verbosity ); /* * compute product: There is an implicit matrix transpose when * passing from Fortran to C and vice-versa. To compute C := * alpha * A * B + beta * C we use dgemm() to compute C' := alpha * * B' * A' + beta * C'. The first two arguments to dgemm() are * 'N' indicating we don't want a transpose in addition to the * implicit one. The matrices A and B are passed in reverse order * so dgemm() receives (after the implicit transpose) B' and A'. * Arguments 3 and 4 are the dimensions of C' and argument 5 is * the column dimension of B' (and the row dimension of A'). */ t1 = wtime(); dgemm( 'N', 'N', cols, rows, mids, 1.0, &b[0][0], cols, &a[0][0], mids, 0.0, &c[0][0], cols ); t2 = wtime(); sec = t2 - t1; if ( verbosity > 1 ) printf( "checksum = %f\n", checksum( c, rows, cols ) ); printf( "BLAS: %6.3f secs %6.3f gflops ( %5d x %5d x %5d )\n", sec, gflop_count / sec, rows, mids, cols ); /* * clean up */ deallocateMatrix( a ); deallocateMatrix( b ); deallocateMatrix( c ); return t2 - t1; }