示例#1
0
/*---------------------------------------------------------------------------
 *
 * Compute matrix product using recursive tiling.
 *
 * Input
 *   int argc        - length of argv[] array
 *   char* argv[]    - pointer to command line parameter array
 *   int verbosity   - program verification: verbosity > 0 gives more output
 *   char* order     - string indicating loop order, e.g., "ijk" or "jki"
 *
 * Output
 *   double          - elapsed time for product computation
 */
double multiply_by_recursive_blocks( int argc, char* argv[], int verbosity,
                                     char* order )
{
    int rows, cols, mids, block_size;
    double **a, **b, **c;
    double t1, t2;
    double sec;
    double gflop_count;

    /*
     * process command line arguments
     */
    rows = atoi( argv[0] );
    mids = atoi( argv[1] );
    cols = atoi( argv[2] );
    block_size = atoi( argv[3] );
    gflop_count = 2.0 * rows * mids * cols / 1.0e9;

    if ( verbosity > 0 )
    {
        printf( "Recursive blocks(%3s): rows = %d, mids = %d, columns = %d\n",
                order, rows, mids, cols );
        printf( "block size = %d\n", block_size );
    }

    /*
     * allocate and initialize matrices
     */
    a = (double**) allocateMatrix( rows, mids );
    b = (double**) allocateMatrix( mids, cols );
    c = (double**) allocateMatrix( rows, cols );
    initialize_matrices( a, b, c, rows, cols, mids, verbosity );

    /*
     * compute product
     */
    t1 = wtime();
    mm_rec( c, a, b, 0, 0, 0, 0, 0, 0, rows, mids, cols, cols, block_size );
    t2 = wtime();
    sec = t2 - t1;

    if ( verbosity > 1 )
        printf( "checksum = %f\n", checksum( c, rows, cols ) );

    printf( "blocks(%3s): %6.3f secs %6.3f gflops ",
            order, sec, gflop_count / sec );
    printf( "( %5d x %5d x %5d ) ( %6d )\n", rows, mids, cols, block_size );

    /*
     * clean up
     */
    deallocateMatrix( a );
    deallocateMatrix( b );
    deallocateMatrix( c );

    return t2 - t1;
}
int main() {
    int n        = 4;
    int **matrix = allocateMatrix(n);

    fillInMatrixValues(matrix);

    Submatrix submatrix = computeLargestSumSubmatrix(matrix, n);

    printf("Largest sum submatrix: \nx1 = %d\ny1 = %d\nx2 = %d\ny2 = %d\n", submatrix.startX, submatrix.startY, submatrix.stopX, submatrix.stopY);

    deallocateMatrix(matrix, n);

    return 0;
}
示例#3
0
/*---------------------------------------------------------------------------
 *
 * Compute matrix product using tiling.  The loop order used for the tile
 * products is specified in string variable "mode".
 *
 * Input
 *   int argc        - length of argv[] array
 *   char* argv[]    - pointer to command line parameter array
 *   int verbosity   - program verification: verbosity > 0 gives more output
 *   char* order     - string indicating loop order, e.g., "ijk" or "jki"
 *
 * Output
 *   double          - elapsed time for product computation
 */
double multiply_by_tiles( int argc, char* argv[], int verbosity, char* order )
{
    int rows, cols, mids;
    int rows_per_tile, cols_per_tile, mids_per_tile;
    int row_start, row_end;
    int col_start, col_end;
    int mid_start, mid_end;
    double **a, **b, **c;
    double t1, t2;
    double sec;
    double gflop_count;

    /*
     * process command line arguments
     */
    rows = atoi( argv[0] );
    mids = atoi( argv[1] );
    cols = atoi( argv[2] );
    rows_per_tile = atoi( argv[3] );
    mids_per_tile = atoi( argv[4] );
    cols_per_tile = atoi( argv[5] );
    gflop_count = 2.0 * rows * mids * cols / 1.0e9;

    if ( verbosity > 0 )
    {
        printf( "Tiles(%3s): rows = %d, mids = %d, columns = %d\n",
                order, rows, mids, cols );
        printf( "block rows = %d, mids = %d, columns = %d\n",
                rows_per_tile, mids_per_tile, cols_per_tile );
    }

    /*
     * allocate and initialize matrices
     */
    a = (double**) allocateMatrix( rows, mids );
    b = (double**) allocateMatrix( mids, cols );
    c = (double**) allocateMatrix( rows, cols );
    initialize_matrices( a, b, c, rows, cols, mids, verbosity );

    /*
     * compute product
     */
    t1 = wtime();
    for ( row_start = 0; row_start < rows; row_start += rows_per_tile )
    {
        row_end = row_start + rows_per_tile - 1;
        if ( row_end >= rows ) row_end = rows - 1;
        for ( col_start = 0; col_start < cols; col_start += cols_per_tile )
        {
            col_end = col_start + cols_per_tile - 1;
            if ( col_end >= cols ) col_end = cols - 1;
            for ( mid_start = 0; mid_start < mids; mid_start += mids_per_tile )
            {
                mid_end = mid_start + mids_per_tile - 1;
                if ( mid_end >= mids ) mid_end = mids - 1;
                do_product( a, b, c, row_start, row_end, col_start,
                            col_end, mid_start, mid_end );
            }
        }
    }
    t2 = wtime();
    sec = t2 - t1;

    if ( verbosity > 1 )
        printf( "checksum = %f\n", checksum( c, rows, cols ) );

    printf( "tiles(%3s):  %6.3f secs %6.3f gflops ",
            order, sec, gflop_count / sec );
    printf( "( %5d x %5d x %5d ) ( %4d x %4d x %4d )\n",
            rows, mids, cols, rows_per_tile, mids_per_tile,
            cols_per_tile );

    /*
     * clean up
     */
    deallocateMatrix( a );
    deallocateMatrix( b );
    deallocateMatrix( c );

    return t2 - t1;
}
示例#4
0
/*---------------------------------------------------------------------------
 *
 * Compute matrix product using BLAS routine DGEMM.
 *
 * Input
 *   int argc        - length of argv[] array
 *   char* argv[]    - pointer to command line parameter array
 *   int verbosity   - program verification: verbosity > 0 gives more output
 *
 * Output
 *   double          - elapsed time for product computation
 */
double multiply_by_blas( int argc, char* argv[], int verbosity )
{
    int rows, cols, mids;
    double **a, **b, **c;
    double t1, t2;
    double sec;
    double gflop_count;

    /*
     * process command line arguments
     */
    rows = atoi( argv[0] );
    mids = atoi( argv[1] );
    cols = atoi( argv[2] );
    gflop_count = 2.0 * rows * mids * cols / 1.0e9;

    if ( verbosity > 0 )
    {
        printf( "BLAS: rows = %d, mids = %d, columns = %d\n",
                rows, mids, cols );
    }

    /*
     * allocate and initialize matrices
     */
    a = (double**) allocateMatrix( rows, mids );
    b = (double**) allocateMatrix( mids, cols );
    c = (double**) allocateMatrix( rows, cols );
    initialize_matrices( a, b, c, rows, cols, mids, verbosity );

    /*
     * compute product: There is an implicit matrix transpose when
     * passing from Fortran to C and vice-versa.  To compute C :=
     * alpha * A * B + beta * C we use dgemm() to compute C' := alpha
     * * B' * A' + beta * C'.  The first two arguments to dgemm() are
     * 'N' indicating we don't want a transpose in addition to the
     * implicit one.  The matrices A and B are passed in reverse order
     * so dgemm() receives (after the implicit transpose) B' and A'.
     * Arguments 3 and 4 are the dimensions of C' and argument 5 is
     * the column dimension of B' (and the row dimension of A').
     */
    t1 = wtime();
    dgemm( 'N', 'N', cols, rows, mids, 1.0, &b[0][0], cols, &a[0][0], mids, 
           0.0, &c[0][0], cols );
    t2 = wtime();
    sec = t2 - t1;

    if ( verbosity > 1 )
        printf( "checksum = %f\n", checksum( c, rows, cols ) );

    printf( "BLAS:        %6.3f secs %6.3f gflops ( %5d x %5d x %5d )\n",
            sec, gflop_count / sec, rows, mids, cols );

    /*
     * clean up
     */
    deallocateMatrix( a );
    deallocateMatrix( b );
    deallocateMatrix( c );

    return t2 - t1;
}