void mexFunction(int nlhs,mxArray *plhs[],int nrhs,const mxArray *prhs[]) { DOUBLE *lpp, *lpf, *sig; unsigned int m,n,nn,mm; int nr,nc,J,kk,lenfil; /* Check for proper number of arguments */ if (nrhs != 2) { mexErrMsgTxt("UpDyadLo requires two input arguments."); } else if (nlhs != 1) { mexErrMsgTxt("UpDyadLo requires one output argument."); } /* Check the dimensions of signal. signal can be n X 1 or 1 X n. */ m = mxGetM(Sig_IN); n = mxGetN(Sig_IN); if(m == 1){ nr = (int) n; nc = 1; nn = 2*n; mm = 1; } else { nr = (int) m; nc = (int) n; nn = 1; mm = 2*m; } J = 0; for( kk = 1; kk < nr; kk *= 2 ) J ++; if( kk != nr){ mexErrMsgTxt("UpDyadLo requires dyadic length"); } /* Create a matrix for the return argument */ LP_OUT = mxCreateDoubleMatrix(mm, nn, mxREAL); /* Assign pointers to the various parameters */ lpp = mxGetPr(LP_OUT); sig = mxGetPr(Sig_IN); lpf = mxGetPr(LPF_IN); lenfil = (int) (mxGetM(LPF_IN) * mxGetN(LPF_IN)); /* should check this */ /* Do the actual computations in a subroutine */ uplo(sig,nr,lpf,lenfil,lpp); }
/** * @brief Perform inverse DWT (periodic boundaries) on 3D data. * * @param wc Wavelet presentation of 3D data. * @param img Reconstructed signal. */ void idpwt3 (const Matrix <T> & wc, Matrix <T> & img) { // assign dwt to result image img = wc; # pragma omp parallel default (shared) num_threads (_num_threads) { T * wcplo, * wcphi, * templo, * temphi, * temptop, * tmp; size_t stride; int sl1 = _sl1_scale, sl2 = _sl2_scale, sl3 = _sl3_scale; const int t_num = omp_get_thread_num (); // loop over levels of backwards DWT for (int j = _min_level; j < _max_level; j++) { // update stride stride = 6 * sl3 * t_num; tmp = & _temp [stride]; templo = & _temp [2 * sl3 + stride]; temphi = & _temp [3 * sl3 + stride]; temptop = & _temp [4 * sl3 + stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along third dimension ('third') of result image for (int c1_loc = 0; c1_loc < 2 * sl1 * 2 * sl2; c1_loc++) { int c1_glob = (c1_loc % (2 * sl1)) + (c1_loc / (2 * sl1)) * _sl1; // copy lowpass part of current line to temporary memory unpackdouble (& img [c1_glob], sl3, _ld12, 0, templo); // copy highpass part of current line to temporary memory unpackdouble (& img [c1_glob + sl3 * _ld12], sl3, _ld12, 0, temphi); // perform lowpass reconstruction uplo (templo, sl3, tmp); // perform highpass reconstruction uphi (temphi, sl3, temptop); // fusion of reconstruction parts adddouble (tmp, temptop, sl3 * 2, tmp); // write back reconstructed line packdouble (tmp, sl3 * 2, _ld12, 0, & img [c1_glob]); } // loop over lines along third dimension of result image // update stride stride = 6 * sl2 * t_num; tmp = & _temp [stride]; templo = & _temp [2 * sl2 + stride]; temphi = & _temp [3 * sl2 + stride]; temptop = & _temp [4 * sl2 + stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along second dimension ('rows') of result image for (int c1_loc = 0; c1_loc < 2 * sl1 * 2 * sl3; c1_loc++) { int c1_glob = (c1_loc / (2 * sl1)) * _sl1 * _sl2; // copy lowpass part of current line to temporary memory unpackdouble (& img [c1_glob], sl2, _sl1, c1_loc % (2 * sl1), templo); // copy highpass part of current line to temporary memory unpackdouble (& img [c1_glob + sl2 * _sl1], sl2, _sl1, c1_loc % (2 * sl1), temphi); // perform lowpass reconstruction uplo (templo, sl2, tmp); // perform highpass reconstruction uphi (temphi, sl2, temptop); // fusion of reconstruction parts adddouble (tmp, temptop, sl2 * 2, tmp); // write back reconstructed line packdouble (tmp, sl2 * 2, _sl1, c1_loc % (2 * sl1), & img [c1_glob]); } // loop over lines along second dimension of result image // update stride stride = 5 * sl1 * t_num; tmp = & _temp [stride]; templo = & _temp [ sl1 + stride]; temphi = & _temp [3 * sl1 + stride]; # pragma omp for schedule (OMP_SCHEDULE) // loop over lines along first dimension ('columns') of result image for (int c2_loc = 0; c2_loc < 2 * sl2 * 2 * sl3; c2_loc++) { int c2_glob = (c2_loc / (2 * sl2)) * _sl2 * _sl1 + (c2_loc % (2 * sl2)) * _sl1; // assign address of current line's lowpass part wcplo = & img [c2_glob]; // assign address of current line's highpass part wcphi = & img [c2_glob + sl1]; // copy lowpass part to temporary memory copydouble (wcplo, tmp, sl1); // perform lowpass reconstruction uplo (wcplo, sl1, templo); // perform highpass reconstruction uphi (wcphi, sl1, temphi); // combine reconstructed parts and write back to current line adddouble (templo, temphi, sl1 * 2, wcplo); } // loop over lines along first dimension ('columns') of result image // update current row / column size sl2 *= 2; sl1 *= 2; sl3 *= 2; } // loop over levels of backwards DWT } // omp parallel }
Matrix operator * (const Matrix& A, const Matrix& B) { if (A.Clo() != B.Rlo() || A.Chi() != B.Rhi()) Matpack.Error("Matrix operator * (const Matrix&, const Matrix&): " "non conformant arguments\n"); // allocate return matrix Matrix C(A.Rlo(),A.Rhi(),B.Clo(),B.Chi()); //------------------------------------------------------------------------// // the BLAS version //------------------------------------------------------------------------// #if defined ( _MATPACK_USE_BLAS_ ) if ( LT(B) ) { // full matrix * lower triangle #ifdef DEBUG cout << "GM*LT\n"; #endif checksquare(B); // copy A to C to protect from overwriting copyvec(C.Store(),A.Store(),A.Elements()); charT side('L'), uplo('U'), transc('N'), diag('N'); intT m(C.Cols()), n(C.Rows()), ldb(B.Cols()), ldc(C.Cols()); doubleT alpha(1.0); F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n, &alpha,B.Store(),&ldb, C.Store(),&ldc); } else if ( UT(B) ) { // full matrix * upper triangle #ifdef DEBUG cout << "GM*UT\n"; #endif checksquare(B); // copy A to C to protect from overwriting copyvec(C.Store(),A.Store(),A.Elements()); charT side('L'), uplo('L'), transc('N'), diag('N'); intT m(C.Cols()), n(C.Rows()), ldb(B.Cols()), ldc(C.Cols()); doubleT alpha(1.0); F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n, &alpha,B.Store(),&ldb, C.Store(),&ldc); } else if ( LT(A) ) { // lower triangle * full matrix #ifdef DEBUG cout << "LT*GM\n"; #endif checksquare(A); // copy B to C to protect from overwriting copyvec(C.Store(),B.Store(),B.Elements()); charT side('R'), uplo('U'), transc('N'), diag('N'); intT m(C.Cols()), n(C.Rows()), ldb(A.Cols()), ldc(C.Cols()); doubleT alpha(1.0); F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n, &alpha,A.Store(),&ldb, C.Store(),&ldc); } else if ( UT(A) ) { // upper triangle * full matrix #ifdef DEBUG cout << "UT*GM\n"; #endif checksquare(A); // copy A to C to protect from overwriting copyvec(C.Store(),B.Store(),B.Elements()); charT side('R'), uplo('L'), transc('N'), diag('N'); intT m(C.Cols()), n(C.Rows()), ldb(A.Cols()), ldc(C.Cols()); doubleT alpha(1.0); F77NAME(dtrmm)(&side,&uplo,&transc,&diag,&m,&n, &alpha,A.Store(),&ldb, C.Store(),&ldc); } else /* GM(A) and GM(B) */ { // GM*GM: full matrix * full matrix #ifdef DEBUG cout << "GM*GM\n"; #endif charT t('N'); intT m(B.Cols()), n(A.Rows()), k(B.Rows()), lda(A.Cols()), ldb(B.Cols()), ldc(C.Cols()); doubleT alpha(1.0), beta(0.0); F77NAME(dgemm)(&t,&t, &m,&n,&k, &alpha,B.Store(),&ldb, A.Store(),&lda, &beta,C.Store(),&ldc); } //------------------------------------------------------------------------// // the non-BLAS version //------------------------------------------------------------------------// #else int cl = A.cl, ch = A.ch, arl = A.rl, arh = A.rh, bcl = B.cl, bch = B.ch; // avoid call to index operator that optimizes very badely double **a = A.M, **b = B.M, **c = C.M; for (int i = arl; i <= arh; i++) { for (int j = bcl; j <= bch; j++) c[i][j] = 0.0; for (int l = cl; l <= ch; l++) { if ( a[i][l] != 0.0 ) { double temp = a[i][l]; for (int j = bcl; j <= bch; j++) c[i][j] += temp * b[l][j]; } } } #endif return C.Value(); }