예제 #1
0
/*---------------------------------------------------------------------------
 *
 * Compute matrix product using recursive tiling.
 *
 * Input
 *   int argc        - length of argv[] array
 *   char* argv[]    - pointer to command line parameter array
 *   int verbosity   - program verification: verbosity > 0 gives more output
 *   char* order     - string indicating loop order, e.g., "ijk" or "jki"
 *
 * Output
 *   double          - elapsed time for product computation
 */
double multiply_by_recursive_blocks( int argc, char* argv[], int verbosity,
                                     char* order )
{
    int rows, cols, mids, block_size;
    double **a, **b, **c;
    double t1, t2;
    double sec;
    double gflop_count;

    /*
     * process command line arguments
     */
    rows = atoi( argv[0] );
    mids = atoi( argv[1] );
    cols = atoi( argv[2] );
    block_size = atoi( argv[3] );
    gflop_count = 2.0 * rows * mids * cols / 1.0e9;

    if ( verbosity > 0 )
    {
        printf( "Recursive blocks(%3s): rows = %d, mids = %d, columns = %d\n",
                order, rows, mids, cols );
        printf( "block size = %d\n", block_size );
    }

    /*
     * allocate and initialize matrices
     */
    a = (double**) allocateMatrix( rows, mids );
    b = (double**) allocateMatrix( mids, cols );
    c = (double**) allocateMatrix( rows, cols );
    initialize_matrices( a, b, c, rows, cols, mids, verbosity );

    /*
     * compute product
     */
    t1 = wtime();
    mm_rec( c, a, b, 0, 0, 0, 0, 0, 0, rows, mids, cols, cols, block_size );
    t2 = wtime();
    sec = t2 - t1;

    if ( verbosity > 1 )
        printf( "checksum = %f\n", checksum( c, rows, cols ) );

    printf( "blocks(%3s): %6.3f secs %6.3f gflops ",
            order, sec, gflop_count / sec );
    printf( "( %5d x %5d x %5d ) ( %6d )\n", rows, mids, cols, block_size );

    /*
     * clean up
     */
    deallocateMatrix( a );
    deallocateMatrix( b );
    deallocateMatrix( c );

    return t2 - t1;
}
예제 #2
0
int main(int argc, char** argv)  {

	#pragma omp parallel
	{
               
		#pragma omp for
		for (int i=0; i<N; i++) {
			for (int j=0; j<M; j++) {
				A[i][j] = i*j;
			}
		}

		// B is the identity matrix

		#pragma omp for
		for (int i=0; i<M; i++) {
			for (int j=0; j<K; j++) {
				B[i][j] = (i==j)?1:0;
			}
		}

		// process recursively
		mm_rec();

	}

	// verify result
	int success = 1;	
	for (int i=0; i<N; i++) {
		for (int j=0; j<MIN(M,K); j++) {
			if (A[i][j] != C[i][j]) {
				success = 0;
			}
		}
		for (int j=MIN(M,K); j<MAX(M,K); j++) {
			if (C[i][j] != 0) {
				success = 0;
			}
		}
	}

	// print verification result
	printf("Verification: %s\n", (success)?"OK":"ERR");
}
예제 #3
0
/*---------------------------------------------------------------------------
 *
 * Computes block-oriented matrix-matrix product recursively.
 *
 * Input:
 *   double** c       - matrix product C = A * B
 *   double** a       - first factor of product
 *   double** b       - second factor of product
 *   int crow, ccol   - starting row and column of block of C
 *   int arow, acol   - starting row and column of block of A
 *   int brow, bcol   - starting row and column of block of B
 *   int l, m, n      - dims of blocks: A is l x m, B is m x n, C is l x n
 *   int N            - full row length (column dimension) of matrix B
 *   int threshold    - B blocks larger than this are partitioned
 *
 * Output:
 *   double** c       - matrix product C = A * B
 *
 * Algorithm based on one presented on page 276 of "Parallel Programming in
 * C with MPI and OpenMP", Michael J. Quinn, McGraw-Hill, 2004.
 *
 * **** NOTE ****: There is a typo in Quinn's code in the recursive call
 * to mm_rec().  The 5th parameter should be "ccol + nhalf[j]" and not
 * use "mhalf" as shown in the text.  The error only shows up when the
 * dimensions of the matrices are not uniform.
 */
void mm_rec( double** c, double** a, double** b,
             int crow, int ccol, int arow, int acol, int brow, int bcol,
             int l, int m, int n, int N, int threshold )
{
    int lhalf[3], mhalf[3], nhalf[3];
    int i, j, k;
    
    if ( m * n > threshold )
    {
        lhalf[0] = 0; lhalf[1] = l/2; lhalf[2] = l - lhalf[1];
        mhalf[0] = 0; mhalf[1] = m/2; mhalf[2] = m - mhalf[1];
        nhalf[0] = 0; nhalf[1] = n/2; nhalf[2] = n - nhalf[1];
        for ( i = 0; i < 2; i++ )
        {
            for ( j = 0; j < 2; j++ )
            {
                for ( k = 0; k < 2; k++ )
                {
                    mm_rec( c, a, b,
                            crow + lhalf[i], ccol + nhalf[j],
                            arow + lhalf[i], acol + mhalf[k],
                            brow + mhalf[k], bcol + nhalf[j],
                            lhalf[i + 1], mhalf[k + 1], nhalf[j + 1],
                            N, threshold );
                }
            }
        }
    }
    else
    {
        do_product( a, b, c,
                    arow, arow + l - 1,
                    bcol, bcol + n - 1,
                    acol, acol + m - 1 );
    }
}