Example #1
0
void matmultrec(int mf, int ml, int nf, int nl, int pf, int pl, double **A, double **B, double **C)
/*    
  recursive subroutine to compute the product of two  
  submatrices of A and B and store the result in C  
*/  
// mf, ml; /* first and last+1 i index */  
// nf, nl; /* first and last+1 j index */  
// pf, pl; /* first and last+1 k index */  
  
#define GRAIN  32768 /* product size below which matmultleaf is used */  
  
{     
	if ((ml-mf)*(nl-nf)*(pl-pf) < GRAIN)   
		matmultleaf(mf, ml, nf, nl, pf, pl, A, B, C);   
	else {   
#pragma omp task
{
   // C00 += A00 * B00
		matmultrec(mf, mf+(ml-mf)/2, nf, nf+(nl-nf)/2, pf, pf+(pl-pf)/2, A, B, C);   
   // C00 += A01 * B10
		matmultrec(mf, mf+(ml-mf)/2, nf, nf+(nl-nf)/2, pf+(pl-pf)/2, pl, A, B, C);   
}
#pragma omp task
{
   // C01 += A00 * B01
		matmultrec(mf, mf+(ml-mf)/2, nf+(nl-nf)/2, nl, pf, pf+(pl-pf)/2, A, B, C);   
   // C01 += A01 * B11
		matmultrec(mf, mf+(ml-mf)/2, nf+(nl-nf)/2, nl, pf+(pl-pf)/2, pl, A, B, C);   
}
#pragma omp task
{
   // C10 += A10 * B00
		matmultrec(mf+(ml-mf)/2, ml, nf, nf+(nl-nf)/2, pf, pf+(pl-pf)/2, A, B, C);   
   // C10 += A11 * B10
		matmultrec(mf+(ml-mf)/2, ml, nf, nf+(nl-nf)/2, pf+(pl-pf)/2, pl, A, B, C);   
}
#pragma omp task
{
   // C11 += A10 * B01
		matmultrec(mf+(ml-mf)/2, ml, nf+(nl-nf)/2, nl, pf, pf+(pl-pf)/2, A, B, C);   
   // C11 += A11 * B11
		matmultrec(mf+(ml-mf)/2, ml, nf+(nl-nf)/2, nl, pf+(pl-pf)/2, pl, A, B, C);   
}
#pragma omp taskwait
	}   
}   
Example #2
0
void strassenMMult(double **C, double **A, double **B, int ml, int pl, int nl)
{
   if (((float)ml)*((float)nl)*((float)pl) < THRESHOLD)   
      matmultleaf(ml, nl, pl, A, B, C); 

   else {
      int m2 = ml/2;
      int n2 = nl/2;
      int p2 = pl/2;

      double **S1 = Allocate2DArray< double >(m2, p2);
      double **S2 = Allocate2DArray< double >(m2, p2);
      double **S3 = Allocate2DArray< double >(m2, p2);
      double **S4 = Allocate2DArray< double >(m2, p2);
      double **S5 = Allocate2DArray< double >(p2, n2);
      double **S6 = Allocate2DArray< double >(p2, n2);
      double **S7 = Allocate2DArray< double >(p2, n2);
      double **S8 = Allocate2DArray< double >(p2, n2);
      
      double **M1 = Allocate2DArray< double >(m2, n2);
      double **M2 = Allocate2DArray< double >(m2, n2);
      double **M3 = Allocate2DArray< double >(m2, n2);
      double **M4 = Allocate2DArray< double >(m2, n2);
      double **M5 = Allocate2DArray< double >(m2, n2);
      double **M6 = Allocate2DArray< double >(m2, n2);
      double **M7 = Allocate2DArray< double >(m2, n2);

      double **T1 = Allocate2DArray< double >(m2, n2);
      double **T2 = Allocate2DArray< double >(m2, n2);

      double **A11 = new double*[m2];
      double **A12 = new double*[m2];
      double **A21 = new double*[m2];
      double **A22 = new double*[m2];

      double **B11 = new double*[p2];
      double **B12 = new double*[p2];
      double **B21 = new double*[p2];
      double **B22 = new double*[p2];

      double **C11 = new double*[m2];
      double **C12 = new double*[m2];
      double **C21 = new double*[m2];
      double **C22 = new double*[m2];

      copyQtrMatrix(A11, m2, A,  0,  0);
      copyQtrMatrix(A12, m2, A,  0, p2);
      copyQtrMatrix(A21, m2, A, m2,  0);
      copyQtrMatrix(A22, m2, A, m2, p2);

      copyQtrMatrix(B11, p2, B,  0,  0);
      copyQtrMatrix(B12, p2, B,  0, n2);
      copyQtrMatrix(B21, p2, B, p2,  0);
      copyQtrMatrix(B22, p2, B, p2, n2);

      copyQtrMatrix(C11, m2, C,  0,  0);
      copyQtrMatrix(C12, m2, C,  0, n2);
      copyQtrMatrix(C21, m2, C, m2,  0);
      copyQtrMatrix(C22, m2, C, m2, n2);

#pragma omp task
{
      // S1 = A21 + A22
      AddMatBlocks(S1, m2, p2, A21, A22);

      // S2 = S1 - A11
      SubMatBlocks(S2, m2, p2, S1, A11);

      // S4 = A12 - S2
      SubMatBlocks(S4, m2, p2, A12, S2);
}
#pragma omp task
{
      // S3 = A11 - A21
      SubMatBlocks(S3, m2, p2, A11, A21);

      // S7 = B22 - B12
      SubMatBlocks(S7, p2, n2, B22, B12);
}
#pragma omp task
{
      // S5 = B12 - B11
      SubMatBlocks(S5, p2, n2, B12, B11);

      // S6 = B22 - S5
      SubMatBlocks(S6, p2, n2, B22, S5);

      // S8 = S6 - B21
      SubMatBlocks(S8, p2, n2, S6, B21);
}
#pragma omp taskwait

#pragma omp task
{
      // M1 = S2 * S6
      strassenMMult(M1, S2,  S6,  m2, p2, n2);

      // M2 = A11 * B11
      strassenMMult(M2, A11, B11, m2, p2, n2);

      // M4 = S3 * S7
      strassenMMult(M4, S3,  S7,  m2, p2, n2);

      // T1 = M1 + M2
      AddMatBlocks(T1, m2, n2, M1, M2);

      // T2 = T1 + M4
      AddMatBlocks(T2, m2, n2, T1, M4);
}
#pragma omp task
{
      // M3 = A12 * B21
      strassenMMult(M3, A12, B21, m2, p2, n2);

      // M5 = S1 * S5
      strassenMMult(M5, S1,  S5,  m2, p2, n2);

      // M6 = S4 * B22
      strassenMMult(M6, S4,  B22, m2, p2, n2);

      // M7 = A22 * S8
      strassenMMult(M7, A22, S8,  m2, p2, n2);
}
#pragma omp taskwait


      // C11 = M2 + M3
      // C12 = T1 + M5 + M6
      // C21 = T2 - M7
      // C22 = T2 + M5

//#pragma omp for
      for (int i = 0; i < m2; ++i)
         for (int j = 0; j < n2; ++j) {
            C11[i][j] = M2[i][j] + M3[i][j];
            C12[i][j] = T1[i][j] + M5[i][j] + M6[i][j];
            C21[i][j] = T2[i][j] - M7[i][j];
            C22[i][j] = T2[i][j] + M5[i][j];
         }

      Free2DArray< double >(S1);
      Free2DArray< double >(S2);
      Free2DArray< double >(S3);
      Free2DArray< double >(S4);
      Free2DArray< double >(S5);
      Free2DArray< double >(S6);
      Free2DArray< double >(S7);
      Free2DArray< double >(S8);

      Free2DArray< double >(M1);
      Free2DArray< double >(M2);
      Free2DArray< double >(M3);
      Free2DArray< double >(M4);
      Free2DArray< double >(M5);
      Free2DArray< double >(M6);
      Free2DArray< double >(M7);

      Free2DArray< double >(T1);
      Free2DArray< double >(T2);

      delete[] A11; delete[] A12; delete[] A21; delete[] A22;
      delete[] B11; delete[] B12; delete[] B21; delete[] B22;
      delete[] C11; delete[] C12; delete[] C21; delete[] C22;
   }
}
void strassenMMult(int mf, int ml, int nf, int nl, int pf, int pl, int **A, int **B, int **C)
{
	if ((long)(ml-mf)*(long)(nl-nf)*(long)(pl-pf) < GRAIN)
	      matmultleaf(mf, ml, nf, nl, pf, pl, A, B, C); 

	else {
		int m2 = (ml-mf)/2;
		int n2 = (nl-nf)/2;
		int p2 = (pl-pf)/2;

		int **M1 = Allocate2DArray< int >(m2, n2);
		int **M2 = Allocate2DArray< int >(m2, n2);
		int **M3 = Allocate2DArray< int >(m2, n2);
		int **M4 = Allocate2DArray< int >(m2, n2);
		int **M5 = Allocate2DArray< int >(m2, n2);
		int **M6 = Allocate2DArray< int >(m2, n2);
		int **M7 = Allocate2DArray< int >(m2, n2);
		
		int **A11 = new int*[m2];
		int **A12 = new int*[m2];
		int **A21 = new int*[m2];
		int **A22 = new int*[m2];
		
		int **B11 = new int*[p2];
		int **B12 = new int*[p2];
		int **B21 = new int*[p2];
		int **B22 = new int*[p2];
		
		int **C11 = new int*[m2];
		int **C12 = new int*[m2];
		int **C21 = new int*[m2];
		int **C22 = new int*[m2];
		
		int **tAM1 = Allocate2DArray< int >(m2, p2);
		int **tBM1 = Allocate2DArray< int >(p2, n2);
		int **tAM2 = Allocate2DArray< int >(m2, p2);
		int **tBM3 = Allocate2DArray< int >(p2, n2);
		int **tBM4 = Allocate2DArray< int >(p2, n2);
		int **tAM5 = Allocate2DArray< int >(m2, p2);
		int **tAM6 = Allocate2DArray< int >(m2, p2);
		int **tBM6 = Allocate2DArray< int >(p2, n2);
		int **tAM7 = Allocate2DArray< int >(m2, p2);
		int **tBM7 = Allocate2DArray< int >(p2, n2);
		
		#pragma omp parallel
		{
		#pragma omp sections
		{
		#pragma omp section
		{
		copyQtrMatrix(A11, m2, A, mf, pf);
		copyQtrMatrix(A12, m2, A, mf, p2);
		copyQtrMatrix(A21, m2, A, m2, pf);
		copyQtrMatrix(A22, m2, A, m2, p2);
		}
		#pragma omp section
		{
		copyQtrMatrix(B11, p2, B, pf, nf);
		copyQtrMatrix(B12, p2, B, pf, n2);
		copyQtrMatrix(B21, p2, B, p2, nf);
		copyQtrMatrix(B22, p2, B, p2, n2);
		}
		#pragma omp section
		{
		copyQtrMatrix(C11, m2, C, mf, nf);
		copyQtrMatrix(C12, m2, C, mf, n2);
		copyQtrMatrix(C21, m2, C, m2, nf);
		copyQtrMatrix(C22, m2, C, m2, n2);
		}
		}
		
		#pragma omp barrier
		
		#pragma omp sections
		{
		#pragma omp section
		{	
	// M1 = (A11 + A22)*(B11 + B22)
		AddMatBlocks(tAM1, m2, p2, A11, A22);
		AddMatBlocks(tBM1, p2, n2, B11, B22);
		strassenMMult(0, m2, 0, n2, 0, p2, tAM1, tBM1, M1);
		}
		#pragma omp section
		{
	//M2 = (A21 + A22)*B11
		AddMatBlocks(tAM2, m2, p2, A21, A22);
		strassenMMult(0, m2, 0, n2, 0, p2, tAM2, B11, M2);
		}
		#pragma omp section
		{
	//M3 = A11*(B12 - B22)
		SubMatBlocks(tBM3, p2, n2, B12, B22);
		strassenMMult(0, m2, 0, n2, 0, p2, A11, tBM3, M3);
		}
		#pragma omp section
		{
	//M4 = A22*(B21 - B11)
		SubMatBlocks(tBM4, p2, n2, B21, B11);
		strassenMMult(0, m2, 0, n2, 0, p2, A22, tBM4, M4);
		}
		#pragma omp section
		{
	//M5 = (A11 + A12)*B22
		AddMatBlocks(tAM5, m2, p2, A11, A12);
		strassenMMult(0, m2, 0, n2, 0, p2, tAM5, B22, M5);
		}
		#pragma omp section
		{
	//M6 = (A21 - A11)*(B11 + B12)
		SubMatBlocks(tAM6, m2, p2, A21, A11);
		AddMatBlocks(tBM6, p2, n2, B11, B12);
		strassenMMult(0, m2, 0, n2, 0, p2, tAM6, tBM6, M6);
		}
		#pragma omp section
		{
	//M7 = (A12 - A22)*(B21 + B22)
		SubMatBlocks(tAM7, m2, p2, A12, A22);
		AddMatBlocks(tBM7, p2, n2, B21, B22);
		strassenMMult(0, m2, 0, n2, 0, p2, tAM7, tBM7, M7);
		}
		}
		
		#pragma omp parallel for
		for (int i = 0; i < m2; i++)
			#pragma omp parallel for
			for (int j = 0; j < n2; j++) {
				C11[i][j] = M1[i][j] + M4[i][j] - M5[i][j] + M7[i][j];
				C12[i][j] = M3[i][j] + M5[i][j];
				C21[i][j] = M2[i][j] + M4[i][j];
				C22[i][j] = M1[i][j] - M2[i][j] + M3[i][j] + M6[i][j];
			}
		
		#pragma omp barrier
		
		#pragma omp sections
		{
		#pragma omp section
		{
		Free2DArray< int >(M1);
		Free2DArray< int >(M2);
		Free2DArray< int >(M3);
		Free2DArray< int >(M4);
		Free2DArray< int >(M5);
		Free2DArray< int >(M6);
		Free2DArray< int >(M7);
		}
		#pragma omp section
		{
		delete[] A11; delete[] A12; delete[] A21; delete[] A22;
		delete[] B11; delete[] B12; delete[] B21; delete[] B22;
		delete[] C11; delete[] C12; delete[] C21; delete[] C22;
		}
		#pragma omp section
		{
		Free2DArray< int >(tAM1);
		Free2DArray< int >(tBM1);
		Free2DArray< int >(tAM2);
		Free2DArray< int >(tBM3);
		Free2DArray< int >(tBM4);
		Free2DArray< int >(tAM5);
		Free2DArray< int >(tAM6);
		Free2DArray< int >(tBM6);
		Free2DArray< int >(tAM7);
		Free2DArray< int >(tBM7);
		}
		}
		}
	}
}