zen_type& operator *= ( const zen_type & other ) { zen_type& zen = static_cast<zen_type&>( *this ); assert( zen.col() == other.row() ); static const size_type threshold = 17; const size_type max_dims = std::max( std::max( zen.row(), zen.col() ), other.col() ); const size_type min_dims = std::min( std::min( zen.row(), zen.col() ), other.col() ); if ( ( max_dims < threshold ) || ( min_dims == 1 ) ) { return direct_multiply( other ); } const size_type R = zen.row(); const size_type C = zen.col(); const size_type OC = other.col(); if ( R & 1 ) { if ( R & 2 ) { return rr1( other ); } return rr2( other ); } if ( C & 1 ) { if ( C & 2 ) { return cc1( other ); } return cc2( other ); } if ( OC & 1 ) { if ( OC & 2 ) { return oc1( other ); } return oc2( other ); } return strassen_multiply( other ); }
/* Function: strassen_multiply -------------------------- Internal function. Matrix multiplication through Strenssen algorithm. Calculates C = AB. Dimensions of A and B must be power of 2. Parameters: a - matrix A rA_s - row start index of A rA_e - row end index of A cA_s - column start index of A cA_e - column end index of A b - matrix B rB_s - column start index 0f B rB_e - column end index of B cB_s - column start index of B cB_e - column end index of B c - result matrix */ void strassen_multiply( double **a, int rA_s, int rA_e, int cA_s, int cA_e, double **b, int rB_s, int rB_e, int cB_s, int cB_e, double **c) { if (((cA_e - cA_s) < 1) || ((rA_e - rA_s) < 1) || ((cB_e - cB_s) < 1) || ((cA_e - cA_s + 1 < SMALL_DIM) && (rA_e - rA_s + 1 < SMALL_DIM) && (cB_e - cB_s + 1 < SMALL_DIM))) { for (int i = 0; i <= (rA_e - rA_s); ++i) { for (int j = 0; j <= (cB_e - cB_s); ++j) { c[i][j] = 0; for (int k = 0; k <= (cA_e - cA_s); ++k) { c[i][j] += a[i + rA_s][k + cA_s] * b[k + rB_s][j + cB_s]; } } } } else { // Intermediate matrix initialization double ***m = (double***)malloc(7 * sizeof(double**)); double ***c_sub = (double***)malloc(4 * sizeof(double**)); int mR = rA_e - rA_s + 1; int mC = cB_e - cB_s + 1; for (int i = 0; i < 7; ++i) { m[i] = (double**)malloc((mR / 2) * sizeof(double*)); for (int j = 0; j < mR / 2; ++j) { m[i][j] = (double*)malloc((mC / 2) * sizeof(double)); } } // Gets results of 7 intermediate matrices int rA_m = (rA_s + rA_e) / 2; int cA_m = (cA_s + cA_e) / 2; int rB_m = (rB_s + rB_e) / 2; int cB_m = (cB_s + cB_e) / 2; // Temporary pointers double **temp1; double **temp2; // Matrix m1 temp1 = matrix_sum(a, rA_s, rA_m, cA_s, cA_m, rA_m + 1, rA_e, cA_m + 1, cA_e); temp2 = matrix_sum(b, rB_s, rB_m, cB_s, cB_m, rB_m + 1, rB_e, cB_m + 1, cB_e); strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s, temp2, 0, rB_m - rB_s, 0, cB_m - cB_s, m[0]); clear2D(&temp1, rA_m - rA_s + 1); clear2D(&temp2, rB_m - rB_s + 1); // Matrix m2 temp1 = matrix_sum(a, rA_m + 1, rA_e, cA_s, cA_m, rA_m + 1, rA_e, cA_m + 1, cA_e); strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s, b, rB_s, rB_m, cB_s, cB_m, m[1]); clear2D(&temp1, rA_e - rA_m); // Matrix m3 temp1 = matrix_sub(b, rB_s, rB_m, cB_m + 1, cB_e, rB_m + 1, rB_e, cB_m + 1, cB_e); strassen_multiply(a, rA_s, rA_m, cA_s, cA_m, temp1, 0, rB_m - rB_s, 0, cB_m - cB_s, m[2]); clear2D(&temp1, rB_m - rB_s + 1); // Matrix m4 temp1 = matrix_sub(b, rB_m + 1, rB_e, cB_s, cB_m, rB_s, rB_m, cB_s, cB_m); strassen_multiply(a, rA_m + 1, rA_e, cA_m + 1, cA_e, temp1, 0, rB_m - rB_s, 0, cB_m - cB_s, m[3]); clear2D(&temp1, rB_e - rB_m); // Matrix m5 temp1 = matrix_sum(a, rA_s, rA_m, cA_s, cA_m, rA_s, rA_m, cA_m + 1, cA_e); strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s, b, rB_m + 1, rB_e, cB_m + 1, cB_e, m[4]); clear2D(&temp1, rA_m - rA_s + 1); // Matrix m6 temp1 = matrix_sub(a, rA_m + 1, rA_e, cA_s, cA_m, rA_s, rA_m, cA_s, cA_m); temp2 = matrix_sum(b, rB_s, rB_m, cB_s, cB_m, rB_s, rB_m, cB_m + 1, cB_e); strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s, temp2, 0, rB_m - rB_s, 0, cB_m - cB_s, m[5]); clear2D(&temp1, rA_e - rA_m); clear2D(&temp2, rB_m - rB_s + 1); // Matrix m7 temp1 = matrix_sub(a, rA_s, rA_m, cA_m + 1, cA_e, rA_m + 1, rA_e, cA_m + 1, cA_e); temp2 = matrix_sum(b, rB_m + 1, rB_e, cB_s, cB_m, rB_m + 1, rB_e, cB_m + 1, cB_e); strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s, temp2, 0, rB_m - rB_s, 0, cB_m - cB_s, m[6]); clear2D(&temp1, rA_m - rA_s + 1); clear2D(&temp2, rB_e - rB_m); // Calculates all result sub-matrices temp1 = sum(m[0], m[3], mR / 2, mC / 2); temp2 = sub(temp1, m[4], mR / 2, mC / 2); c_sub[0] = sum(temp2, m[6], mR / 2, mC / 2); clear2D(&temp1, mR / 2); clear2D(&temp2, mR / 2); c_sub[1] = sum(m[2], m[4], mR / 2, mC / 2); c_sub[2] = sum(m[1], m[3], mR / 2, mC / 2); temp1 = sum(m[0], m[2], mR / 2, mC / 2); temp2 = sum(temp1, m[5], mR / 2, mC / 2); c_sub[3] = sub(temp2, m[1], mR / 2, mC / 2); clear2D(&temp1, mR / 2); clear2D(&temp2, mR / 2); // Free intermediate matrices for (int i = 0; i < 7; ++i) { for (int j = 0; j < mR / 2; ++j) { free(m[i][j]); } free(m[i]); } free(m); // Combine sub-matrices for (int i = 0; i < 4; ++i) { for (int j = 0; j < mR / 2; ++j) { for (int k = 0; k < mC / 2; ++k) { c[(i / 2) * mR / 2 + j][(i % 2) * mC / 2 + k] = c_sub[i][j][k]; } } } // Free sub mmatrices for (int i = 0; i < 4; ++i) { for (int j = 0; j < mR / 2; ++j) { free(c_sub[i][j]); } free(c_sub[i]); } free(c_sub); } }