Matrix<T> operator*( const Matrix<T>& M1, const Matrix<T>& M2 ) { Q_ASSERT(M1.type() == M2.type()); Q_ASSERT(M1.constData() && M2.constData() && M1.size() && M2.size()); Q_ASSERT(M1.cols() == M2.rows()); Matrix<T> dst(M1.rows(), M2.cols()); int src1Stride1(M1.cols() * sizeof(T)); int src2Stride1(M2.cols() * sizeof(T)); int dstStride1(dst.cols() * sizeof(T)); int stride2(sizeof(T)); if (M1.type() == CV_64F) { const Ipp64f* pSrc1(reinterpret_cast<const Ipp64f*>(M1.constData())); const Ipp64f* pSrc2(reinterpret_cast<const Ipp64f*>(M2.constData())); Ipp64f* pDst(reinterpret_cast<Ipp64f*>(dst.data())); IppStatus status(ippmMul_mm_64f(pSrc1, src1Stride1, stride2, M1.cols(), M1.rows(), pSrc2, src2Stride1, stride2, M2.cols(), M2.rows(), pDst, dstStride1, stride2)); Q_ASSERT(status == ippStsNoErr); } else { for (int rowIndex(0); rowIndex < dst.rows(); rowIndex++) { for (int colIndex(0); colIndex < dst.cols(); colIndex++) { T sumValue; memset(&sumValue, 0, sizeof(T)); for (int sumIndex(0); sumIndex < M1.cols(); sumIndex++) { sumValue += M1.get(rowIndex, sumIndex) * M2.get(sumIndex, colIndex); } dst.set(sumValue, rowIndex, colIndex); } } } return dst; }
void matrix_product_ipp(int Am, int An, int Bn, const double *A, const double *B, double *R) { ippmMul_mm_64f(A, An*8, 8, An, Am, B, Bn*8, 8, Bn, An, R, Bn*8, 8); }