zen_type& operator *= ( const zen_type & other )
    {
        zen_type& zen = static_cast<zen_type&>( *this );
        assert( zen.col() == other.row() );
        static const size_type threshold = 17;
        const size_type max_dims = std::max( std::max( zen.row(), zen.col() ), other.col() );
        const size_type min_dims = std::min( std::min( zen.row(), zen.col() ), other.col() );

        if ( ( max_dims < threshold )  || ( min_dims == 1 ) ) {
            return direct_multiply( other );
        }

        const size_type R = zen.row();
        const size_type C = zen.col();
        const size_type OC = other.col();

        if ( R & 1 )
        {
            if ( R & 2 ) {
                return rr1( other );
            }

            return rr2( other );
        }

        if ( C & 1 )
        {
            if ( C & 2 ) {
                return cc1( other );
            }

            return cc2( other );
        }

        if ( OC & 1 )
        {
            if ( OC & 2 ) {
                return oc1( other );
            }

            return oc2( other );
        }

        return strassen_multiply( other );
    }
/*
	Function: strassen_multiply
	--------------------------
	Internal function. Matrix multiplication through Strenssen algorithm.
	Calculates C = AB. Dimensions of A and B must be power of 2.

	Parameters:
	a - matrix A
	rA_s - row start index of A
	rA_e - row end index of A
	cA_s - column start index of A
	cA_e - column end index of A
	b - matrix B
	rB_s - column start index 0f B
	rB_e - column end index of B
	cB_s - column start index of B
	cB_e - column end index of B
	c - result matrix
*/
void strassen_multiply(
	double **a, int rA_s, int rA_e, int cA_s, int cA_e,
	double **b, int rB_s, int rB_e, int cB_s, int cB_e, double **c) {

	if (((cA_e - cA_s) < 1) || ((rA_e - rA_s) < 1) ||
		((cB_e - cB_s) < 1) ||
		((cA_e - cA_s + 1 < SMALL_DIM) &&
		(rA_e - rA_s + 1 < SMALL_DIM) &&
		(cB_e - cB_s + 1 < SMALL_DIM))) {
		for (int i = 0; i <= (rA_e - rA_s); ++i) {
			for (int j = 0; j <= (cB_e - cB_s); ++j) {
				c[i][j] = 0;
				for (int k = 0; k <= (cA_e - cA_s); ++k) {
					c[i][j] += a[i + rA_s][k + cA_s] * b[k + rB_s][j + cB_s];
				}
			}
		}
	}
	else {
		// Intermediate matrix initialization
		double ***m = (double***)malloc(7 * sizeof(double**));
		double ***c_sub = (double***)malloc(4 * sizeof(double**));
		int mR = rA_e - rA_s + 1;
		int mC = cB_e - cB_s + 1;

		for (int i = 0; i < 7; ++i) {
			m[i] = (double**)malloc((mR / 2) * sizeof(double*));
			for (int j = 0; j < mR / 2; ++j) {
				m[i][j] = (double*)malloc((mC / 2) * sizeof(double));
			}
		}

		// Gets results of 7 intermediate matrices
		int rA_m = (rA_s + rA_e) / 2;
		int cA_m = (cA_s + cA_e) / 2;
		int rB_m = (rB_s + rB_e) / 2;
		int cB_m = (cB_s + cB_e) / 2;
		// Temporary pointers
		double **temp1;
		double **temp2;

		// Matrix m1
		temp1 = matrix_sum(a, rA_s, rA_m, cA_s, cA_m, rA_m + 1, rA_e,
			cA_m + 1, cA_e);
		temp2 = matrix_sum(b, rB_s, rB_m, cB_s, cB_m, rB_m + 1, rB_e,
			cB_m + 1, cB_e);
		strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s, temp2,
			0, rB_m - rB_s, 0, cB_m - cB_s, m[0]);
		clear2D(&temp1, rA_m - rA_s + 1);
		clear2D(&temp2, rB_m - rB_s + 1);
		// Matrix m2
		temp1 = matrix_sum(a, rA_m + 1, rA_e, cA_s, cA_m, rA_m + 1,
			rA_e, cA_m + 1, cA_e);
		strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s,
			b, rB_s, rB_m, cB_s, cB_m, m[1]);
		clear2D(&temp1, rA_e - rA_m);
		// Matrix m3
		temp1 = matrix_sub(b, rB_s, rB_m, cB_m + 1, cB_e, rB_m + 1,
			rB_e, cB_m + 1, cB_e);
		strassen_multiply(a, rA_s, rA_m, cA_s, cA_m, temp1, 0,
			rB_m - rB_s, 0, cB_m - cB_s, m[2]);
		clear2D(&temp1, rB_m - rB_s + 1);
		// Matrix m4
		temp1 = matrix_sub(b, rB_m + 1, rB_e, cB_s, cB_m, rB_s, rB_m,
			cB_s, cB_m);
		strassen_multiply(a, rA_m + 1, rA_e, cA_m + 1, cA_e, temp1,
			0, rB_m - rB_s, 0, cB_m - cB_s, m[3]);
		clear2D(&temp1, rB_e - rB_m);
		// Matrix m5
		temp1 = matrix_sum(a, rA_s, rA_m, cA_s, cA_m, rA_s, rA_m,
			cA_m + 1, cA_e);
		strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s, b,
			rB_m + 1, rB_e, cB_m + 1, cB_e, m[4]);
		clear2D(&temp1, rA_m - rA_s + 1);
		// Matrix m6
		temp1 = matrix_sub(a, rA_m + 1, rA_e, cA_s, cA_m, rA_s, rA_m,
			cA_s, cA_m);
		temp2 = matrix_sum(b, rB_s, rB_m, cB_s, cB_m, rB_s, rB_m,
			cB_m + 1, cB_e);
		strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s,
			temp2, 0, rB_m - rB_s, 0, cB_m - cB_s, m[5]);
		clear2D(&temp1, rA_e - rA_m);
		clear2D(&temp2, rB_m - rB_s + 1);
		// Matrix m7
		temp1 = matrix_sub(a, rA_s, rA_m, cA_m + 1, cA_e, rA_m + 1,
			rA_e, cA_m + 1, cA_e);
		temp2 = matrix_sum(b, rB_m + 1, rB_e, cB_s, cB_m, rB_m + 1,
			rB_e, cB_m + 1, cB_e);
		strassen_multiply(temp1, 0, rA_m - rA_s, 0, cA_m - cA_s, temp2,
			0, rB_m - rB_s, 0, cB_m - cB_s, m[6]);
		clear2D(&temp1, rA_m - rA_s + 1);
		clear2D(&temp2, rB_e - rB_m);

		// Calculates all result sub-matrices
		temp1 = sum(m[0], m[3], mR / 2, mC / 2);
		temp2 = sub(temp1, m[4], mR / 2, mC / 2);
		c_sub[0] = sum(temp2, m[6], mR / 2, mC / 2);
		clear2D(&temp1, mR / 2);
		clear2D(&temp2, mR / 2);
		c_sub[1] = sum(m[2], m[4], mR / 2, mC / 2);
		c_sub[2] = sum(m[1], m[3], mR / 2, mC / 2);
		temp1 = sum(m[0], m[2], mR / 2, mC / 2);
		temp2 = sum(temp1, m[5], mR / 2, mC / 2);
		c_sub[3] = sub(temp2, m[1], mR / 2, mC / 2);
		clear2D(&temp1, mR / 2);
		clear2D(&temp2, mR / 2);

		// Free intermediate matrices
		for (int i = 0; i < 7; ++i) {
			for (int j = 0; j < mR / 2; ++j) {
				free(m[i][j]);
			}
			free(m[i]);
		}
		free(m);

		// Combine sub-matrices
		for (int i = 0; i < 4; ++i) {
			for (int j = 0; j < mR / 2; ++j) {
				for (int k = 0; k < mC / 2; ++k) {
					c[(i / 2) * mR / 2 + j][(i % 2) * mC / 2 + k] =
						c_sub[i][j][k];
				}
			}
		}

		// Free sub mmatrices
		for (int i = 0; i < 4; ++i) {
			for (int j = 0; j < mR / 2; ++j) {
				free(c_sub[i][j]);
			}
			free(c_sub[i]);
		}
		free(c_sub);
	}
}