/*--------------------------------------------------------------------------- * * Compute matrix product using recursive tiling. * * Input * int argc - length of argv[] array * char* argv[] - pointer to command line parameter array * int verbosity - program verification: verbosity > 0 gives more output * char* order - string indicating loop order, e.g., "ijk" or "jki" * * Output * double - elapsed time for product computation */ double multiply_by_recursive_blocks( int argc, char* argv[], int verbosity, char* order ) { int rows, cols, mids, block_size; double **a, **b, **c; double t1, t2; double sec; double gflop_count; /* * process command line arguments */ rows = atoi( argv[0] ); mids = atoi( argv[1] ); cols = atoi( argv[2] ); block_size = atoi( argv[3] ); gflop_count = 2.0 * rows * mids * cols / 1.0e9; if ( verbosity > 0 ) { printf( "Recursive blocks(%3s): rows = %d, mids = %d, columns = %d\n", order, rows, mids, cols ); printf( "block size = %d\n", block_size ); } /* * allocate and initialize matrices */ a = (double**) allocateMatrix( rows, mids ); b = (double**) allocateMatrix( mids, cols ); c = (double**) allocateMatrix( rows, cols ); initialize_matrices( a, b, c, rows, cols, mids, verbosity ); /* * compute product */ t1 = wtime(); mm_rec( c, a, b, 0, 0, 0, 0, 0, 0, rows, mids, cols, cols, block_size ); t2 = wtime(); sec = t2 - t1; if ( verbosity > 1 ) printf( "checksum = %f\n", checksum( c, rows, cols ) ); printf( "blocks(%3s): %6.3f secs %6.3f gflops ", order, sec, gflop_count / sec ); printf( "( %5d x %5d x %5d ) ( %6d )\n", rows, mids, cols, block_size ); /* * clean up */ deallocateMatrix( a ); deallocateMatrix( b ); deallocateMatrix( c ); return t2 - t1; }
int main(int argc, char** argv) { #pragma omp parallel { #pragma omp for for (int i=0; i<N; i++) { for (int j=0; j<M; j++) { A[i][j] = i*j; } } // B is the identity matrix #pragma omp for for (int i=0; i<M; i++) { for (int j=0; j<K; j++) { B[i][j] = (i==j)?1:0; } } // process recursively mm_rec(); } // verify result int success = 1; for (int i=0; i<N; i++) { for (int j=0; j<MIN(M,K); j++) { if (A[i][j] != C[i][j]) { success = 0; } } for (int j=MIN(M,K); j<MAX(M,K); j++) { if (C[i][j] != 0) { success = 0; } } } // print verification result printf("Verification: %s\n", (success)?"OK":"ERR"); }
/*--------------------------------------------------------------------------- * * Computes block-oriented matrix-matrix product recursively. * * Input: * double** c - matrix product C = A * B * double** a - first factor of product * double** b - second factor of product * int crow, ccol - starting row and column of block of C * int arow, acol - starting row and column of block of A * int brow, bcol - starting row and column of block of B * int l, m, n - dims of blocks: A is l x m, B is m x n, C is l x n * int N - full row length (column dimension) of matrix B * int threshold - B blocks larger than this are partitioned * * Output: * double** c - matrix product C = A * B * * Algorithm based on one presented on page 276 of "Parallel Programming in * C with MPI and OpenMP", Michael J. Quinn, McGraw-Hill, 2004. * * **** NOTE ****: There is a typo in Quinn's code in the recursive call * to mm_rec(). The 5th parameter should be "ccol + nhalf[j]" and not * use "mhalf" as shown in the text. The error only shows up when the * dimensions of the matrices are not uniform. */ void mm_rec( double** c, double** a, double** b, int crow, int ccol, int arow, int acol, int brow, int bcol, int l, int m, int n, int N, int threshold ) { int lhalf[3], mhalf[3], nhalf[3]; int i, j, k; if ( m * n > threshold ) { lhalf[0] = 0; lhalf[1] = l/2; lhalf[2] = l - lhalf[1]; mhalf[0] = 0; mhalf[1] = m/2; mhalf[2] = m - mhalf[1]; nhalf[0] = 0; nhalf[1] = n/2; nhalf[2] = n - nhalf[1]; for ( i = 0; i < 2; i++ ) { for ( j = 0; j < 2; j++ ) { for ( k = 0; k < 2; k++ ) { mm_rec( c, a, b, crow + lhalf[i], ccol + nhalf[j], arow + lhalf[i], acol + mhalf[k], brow + mhalf[k], bcol + nhalf[j], lhalf[i + 1], mhalf[k + 1], nhalf[j + 1], N, threshold ); } } } } else { do_product( a, b, c, arow, arow + l - 1, bcol, bcol + n - 1, acol, acol + m - 1 ); } }