void complexludecompositionunpacked(ap::complex_2d_array a, int m, int n, ap::complex_2d_array& l, ap::complex_2d_array& u, ap::integer_1d_array& pivots) { int i; int j; int minmn; if( m==0||n==0 ) { return; } minmn = ap::minint(m, n); l.setbounds(1, m, 1, minmn); u.setbounds(1, minmn, 1, n); complexludecomposition(a, m, n, pivots); for(i = 1; i <= m; i++) { for(j = 1; j <= minmn; j++) { if( j>i ) { l(i,j) = 0; } if( j==i ) { l(i,j) = 1; } if( j<i ) { l(i,j) = a(i,j); } } } for(i = 1; i <= minmn; i++) { for(j = 1; j <= n; j++) { if( j<i ) { u(i,j) = 0; } if( j>=i ) { u(i,j) = a(i,j); } } } }
/******************************************************************** complex rank-1 kernel ********************************************************************/ bool ialglib::_i_cmatrixrank1f(int m, int n, ap::complex_2d_array& a, int ia, int ja, ap::complex_1d_array& u, int uoffs, ap::complex_1d_array& v, int voffs) { ap::complex *arow, *pu, *pv, *vtmp, *dst; int n2 = n/2; int stride = a.getstride(); int i, j; // // update pairs of rows // arow = &a(ia,ja); pu = &u(uoffs); vtmp = &v(voffs); for(i=0; i<m; i++, arow+=stride, pu++) { // // update by two // for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2) { double ux = pu[0].x; double uy = pu[0].y; double v0x = pv[0].x; double v0y = pv[0].y; double v1x = pv[1].x; double v1y = pv[1].y; dst[0].x += ux*v0x-uy*v0y; dst[0].y += ux*v0y+uy*v0x; dst[1].x += ux*v1x-uy*v1y; dst[1].y += ux*v1y+uy*v1x; //dst[0] += pu[0]*pv[0]; //dst[1] += pu[0]*pv[1]; } // // final update // if( n%2!=0 ) dst[0] += pu[0]*pv[0]; } return true; }
/************************************************************************* Generate matrix with given condition number C (2-norm) *************************************************************************/ static void cmatrixgenzero(ap::complex_2d_array& a0, int n) { int i; int j; a0.setlength(n, n); for(i = 0; i <= n-1; i++) { for(j = 0; j <= n-1; j++) { a0(i,j) = 0; } } }
/************************************************************************* Generation of random NxN Hermitian positive definite matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random HPD matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ void hpdmatrixrndcond(int n, double c, ap::complex_2d_array& a) { int i; int j; double l1; double l2; // // Special cases // if( n<=0||ap::fp_less(c,1) ) { return; } a.setbounds(0, n-1, 0, n-1); if( n==1 ) { a(0,0) = 1; return; } // // Prepare matrix // l1 = 0; l2 = log(1/c); for(i = 0; i <= n-1; i++) { for(j = 0; j <= n-1; j++) { a(i,j) = 0; } } a(0,0) = exp(l1); for(i = 1; i <= n-2; i++) { a(i,i) = exp(ap::randomreal()*(l2-l1)+l1); } a(n-1,n-1) = exp(l2); // // Multiply // hmatrixrndmultiply(a, n); }
/************************************************************************* Copy *************************************************************************/ static void makeacopy(const ap::complex_2d_array& a, int m, int n, ap::complex_2d_array& b) { int i; int j; b.setbounds(0, m-1, 0, n-1); for(i = 0; i <= m-1; i++) { for(j = 0; j <= n-1; j++) { b(i,j) = a(i,j); } } }
/************************************************************************* Generation of random NxN Hermitian matrix with given condition number and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ void hmatrixrndcond(int n, double c, ap::complex_2d_array& a) { int i; int j; double l1; double l2; ap::ap_error::make_assertion(n>=1&&ap::fp_greater_eq(c,1), "HMatrixRndCond: N<1 or C<1!"); a.setbounds(0, n-1, 0, n-1); if( n==1 ) { // // special case // a(0,0) = 2*ap::randominteger(2)-1; return; } // // Prepare matrix // l1 = 0; l2 = log(1/c); for(i = 0; i <= n-1; i++) { for(j = 0; j <= n-1; j++) { a(i,j) = 0; } } a(0,0) = exp(l1); for(i = 1; i <= n-2; i++) { a(i,i) = (2*ap::randominteger(2)-1)*exp(ap::randomreal()*(l2-l1)+l1); } a(n-1,n-1) = exp(l2); // // Multiply // hmatrixrndmultiply(a, n); }
/************************************************************************* Generation of random NxN complex matrix with given condition number C and norm2(A)=1 INPUT PARAMETERS: N - matrix size C - condition number (in 2-norm) OUTPUT PARAMETERS: A - random matrix with norm2(A)=1 and cond(A)=C -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ void cmatrixrndcond(int n, double c, ap::complex_2d_array& a) { int i; int j; double l1; double l2; hqrndstate state; ap::complex v; ap::ap_error::make_assertion(n>=1&&ap::fp_greater_eq(c,1), "CMatrixRndCond: N<1 or C<1!"); a.setbounds(0, n-1, 0, n-1); if( n==1 ) { // // special case // hqrndrandomize(state); hqrndunit2(state, v.x, v.y); a(0,0) = v; return; } l1 = 0; l2 = log(1/c); for(i = 0; i <= n-1; i++) { for(j = 0; j <= n-1; j++) { a(i,j) = 0; } } a(0,0) = exp(l1); for(i = 1; i <= n-2; i++) { a(i,i) = exp(ap::randomreal()*(l2-l1)+l1); } a(n-1,n-1) = exp(l2); cmatrixrndorthogonalfromtheleft(a, n, n); cmatrixrndorthogonalfromtheright(a, n, n); }
/************************************************************************* Generation of a random Haar distributed orthogonal complex matrix INPUT PARAMETERS: N - matrix size, N>=1 OUTPUT PARAMETERS: A - orthogonal NxN matrix, array[0..N-1,0..N-1] -- ALGLIB routine -- 04.12.2009 Bochkanov Sergey *************************************************************************/ void cmatrixrndorthogonal(int n, ap::complex_2d_array& a) { int i; int j; ap::ap_error::make_assertion(n>=1, "CMatrixRndOrthogonal: N<1!"); a.setbounds(0, n-1, 0, n-1); for(i = 0; i <= n-1; i++) { for(j = 0; j <= n-1; j++) { if( i==j ) { a(i,j) = 1; } else { a(i,j) = 0; } } } cmatrixrndorthogonalfromtheright(a, n, n); }
/************************************************************************* Finding the eigenvalues and eigenvectors of a Hermitian matrix The algorithm finds eigen pairs of a Hermitian matrix by reducing it to real tridiagonal form and using the QL/QR algorithm. Input parameters: A - Hermitian matrix which is given by its upper or lower triangular part. Array whose indexes range within [0..N-1, 0..N-1]. N - size of matrix A. IsUpper - storage format. ZNeeded - flag controlling whether the eigenvectors are needed or not. If ZNeeded is equal to: * 0, the eigenvectors are not returned; * 1, the eigenvectors are returned. Output parameters: D - eigenvalues in ascending order. Array whose index ranges within [0..N-1]. Z - if ZNeeded is equal to: * 0, Z hasn’t changed; * 1, Z contains the eigenvectors. Array whose indexes range within [0..N-1, 0..N-1]. The eigenvectors are stored in the matrix columns. Result: True, if the algorithm has converged. False, if the algorithm hasn't converged (rare case). Note: eigen vectors of Hermitian matrix are defined up to multiplication by a complex number L, such as |L|=1. -- ALGLIB -- Copyright 2005, 23 March 2007 by Bochkanov Sergey *************************************************************************/ bool hmatrixevd(ap::complex_2d_array a, int n, int zneeded, bool isupper, ap::real_1d_array& d, ap::complex_2d_array& z) { bool result; ap::complex_1d_array tau; ap::real_1d_array e; ap::real_1d_array work; ap::real_2d_array t; ap::complex_2d_array q; int i; int k; double v; ap::ap_error::make_assertion(zneeded==0||zneeded==1, "HermitianEVD: incorrect ZNeeded"); // // Reduce to tridiagonal form // hmatrixtd(a, n, isupper, tau, d, e); if( zneeded==1 ) { hmatrixtdunpackq(a, n, isupper, tau, q); zneeded = 2; } // // TDEVD // result = smatrixtdevd(d, e, n, zneeded, t); // // Eigenvectors are needed // Calculate Z = Q*T = Re(Q)*T + i*Im(Q)*T // if( result&&zneeded!=0 ) { work.setbounds(0, n-1); z.setbounds(0, n-1, 0, n-1); for(i = 0; i <= n-1; i++) { // // Calculate real part // for(k = 0; k <= n-1; k++) { work(k) = 0; } for(k = 0; k <= n-1; k++) { v = q(i,k).x; ap::vadd(&work(0), &t(k, 0), ap::vlen(0,n-1), v); } for(k = 0; k <= n-1; k++) { z(i,k).x = work(k); } // // Calculate imaginary part // for(k = 0; k <= n-1; k++) { work(k) = 0; } for(k = 0; k <= n-1; k++) { v = q(i,k).y; ap::vadd(&work(0), &t(k, 0), ap::vlen(0,n-1), v); } for(k = 0; k <= n-1; k++) { z(i,k).y = work(k); } } } return result; }
/************************************************************************* LU inverse *************************************************************************/ static bool cmatrixinvmatlu(ap::complex_2d_array& a, const ap::integer_1d_array& pivots, int n) { bool result; ap::complex_1d_array work; int i; int iws; int j; int jb; int jj; int jp; ap::complex v; result = true; // // Quick return if possible // if( n==0 ) { return result; } work.setbounds(0, n-1); // // Form inv(U) // if( !cmatrixinvmattr(a, n, true, false) ) { result = false; return result; } // // Solve the equation inv(A)*L = inv(U) for inv(A). // for(j = n-1; j >= 0; j--) { // // Copy current column of L to WORK and replace with zeros. // for(i = j+1; i <= n-1; i++) { work(i) = a(i,j); a(i,j) = 0; } // // Compute current column of inv(A). // if( j<n-1 ) { for(i = 0; i <= n-1; i++) { v = ap::vdotproduct(&a(i, j+1), 1, "N", &work(j+1), 1, "N", ap::vlen(j+1,n-1)); a(i,j) = a(i,j)-v; } } } // // Apply column interchanges. // for(j = n-2; j >= 0; j--) { jp = pivots(j); if( jp!=j ) { ap::vmove(&work(0), 1, &a(0, j), a.getstride(), "N", ap::vlen(0,n-1)); ap::vmove(&a(0, j), a.getstride(), &a(0, jp), a.getstride(), "N", ap::vlen(0,n-1)); ap::vmove(&a(0, jp), a.getstride(), &work(0), 1, "N", ap::vlen(0,n-1)); } } return result; }
/************************************************************************* Unsets 2D array. *************************************************************************/ static void unset2dc(ap::complex_2d_array& a) { a.setbounds(0, 0, 0, 0); a(0,0) = 2*ap::randomreal()-1; }
/************************************************************************* Unpacking matrix Q which reduces a Hermitian matrix to a real tridiagonal form. Input parameters: A - the result of a HMatrixTD subroutine N - size of matrix A. IsUpper - storage format (a parameter of HMatrixTD subroutine) Tau - the result of a HMatrixTD subroutine Output parameters: Q - transformation matrix. array with elements [0..N-1, 0..N-1]. -- ALGLIB -- Copyright 2005, 2007, 2008 by Bochkanov Sergey *************************************************************************/ void hmatrixtdunpackq(const ap::complex_2d_array& a, const int& n, const bool& isupper, const ap::complex_1d_array& tau, ap::complex_2d_array& q) { int i; int j; ap::complex_1d_array v; ap::complex_1d_array work; int i_; int i1_; if( n==0 ) { return; } // // init // q.setbounds(0, n-1, 0, n-1); v.setbounds(1, n); work.setbounds(0, n-1); for(i = 0; i <= n-1; i++) { for(j = 0; j <= n-1; j++) { if( i==j ) { q(i,j) = 1; } else { q(i,j) = 0; } } } // // unpack Q // if( isupper ) { for(i = 0; i <= n-2; i++) { // // Apply H(i) // i1_ = (0) - (1); for(i_=1; i_<=i+1;i_++) { v(i_) = a(i_+i1_,i+1); } v(i+1) = 1; complexapplyreflectionfromtheleft(q, tau(i), v, 0, i, 0, n-1, work); } } else { for(i = n-2; i >= 0; i--) { // // Apply H(i) // i1_ = (i+1) - (1); for(i_=1; i_<=n-i-1;i_++) { v(i_) = a(i_+i1_,i); } v(1) = 1; complexapplyreflectionfromtheleft(q, tau(i), v, i+1, n-1, 0, n-1, work); } } }
/************************************************************************* triangular inverse *************************************************************************/ static bool cmatrixinvmattr(ap::complex_2d_array& a, int n, bool isupper, bool isunittriangular) { bool result; bool nounit; int i; int j; ap::complex v; ap::complex ajj; ap::complex_1d_array t; result = true; t.setbounds(0, n-1); // // Test the input parameters. // nounit = !isunittriangular; if( isupper ) { // // Compute inverse of upper triangular matrix. // for(j = 0; j <= n-1; j++) { if( nounit ) { if( a(j,j)==0 ) { result = false; return result; } a(j,j) = 1/a(j,j); ajj = -a(j,j); } else { ajj = -1; } // // Compute elements 1:j-1 of j-th column. // if( j>0 ) { ap::vmove(&t(0), 1, &a(0, j), a.getstride(), "N", ap::vlen(0,j-1)); for(i = 0; i <= j-1; i++) { if( i<j-1 ) { v = ap::vdotproduct(&a(i, i+1), 1, "N", &t(i+1), 1, "N", ap::vlen(i+1,j-1)); } else { v = 0; } if( nounit ) { a(i,j) = v+a(i,i)*t(i); } else { a(i,j) = v+t(i); } } ap::vmul(&a(0, j), a.getstride(), ap::vlen(0,j-1), ajj); } } } else { // // Compute inverse of lower triangular matrix. // for(j = n-1; j >= 0; j--) { if( nounit ) { if( a(j,j)==0 ) { result = false; return result; } a(j,j) = 1/a(j,j); ajj = -a(j,j); } else { ajj = -1; } if( j<n-1 ) { // // Compute elements j+1:n of j-th column. // ap::vmove(&t(j+1), 1, &a(j+1, j), a.getstride(), "N", ap::vlen(j+1,n-1)); for(i = j+1; i <= n-1; i++) { if( i>j+1 ) { v = ap::vdotproduct(&a(i, j+1), 1, "N", &t(j+1), 1, "N", ap::vlen(j+1,i-1)); } else { v = 0; } if( nounit ) { a(i,j) = v+a(i,i)*t(i); } else { a(i,j) = v+t(i); } } ap::vmul(&a(j+1, j), a.getstride(), ap::vlen(j+1,n-1), ajj); } } } return result; }
/******************************************************************** complex TRSM kernel ********************************************************************/ bool ialglib::_i_cmatrixrighttrsmf(int m, int n, const ap::complex_2d_array& a, int i1, int j1, bool isupper, bool isunit, int optype, ap::complex_2d_array& x, int i2, int j2) { if( m>alglib_c_block || n>alglib_c_block ) return false; // // local buffers // double *pdiag; int i; double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __tmpbuf[2*alglib_c_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf, alglib_simd_alignment); double * const xbuf = (double * const) alglib_align(__xbuf, alglib_simd_alignment); double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment); // // Prepare // bool uppera; mcopyblock_complex(n, n, &a(i1,j1), optype, a.getstride(), abuf); mcopyblock_complex(m, n, &x(i2,j2), 0, x.getstride(), xbuf); if( isunit ) for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1)) { pdiag[0] = 1.0; pdiag[1] = 0.0; } if( optype==0 ) uppera = isupper; else uppera = !isupper; // // Solve Y*A^-1=X where A is upper or lower triangular // if( uppera ) { for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1)) { ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]); ap::complex alpha; alpha.x = -beta.x; alpha.y = -beta.y; vcopy_complex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj"); mv_complex(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); } mcopyunblock_complex(m, n, xbuf, 0, &x(i2,j2), x.getstride()); } else { for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1)) { ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]); ap::complex alpha; alpha.x = -beta.x; alpha.y = -beta.y; vcopy_complex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj"); mv_complex(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); } mcopyunblock_complex(m, n, xbuf, 0, &x(i2,j2), x.getstride()); } return true; }
/******************************************************************** complex GEMM kernel ********************************************************************/ bool ialglib::_i_cmatrixgemmf(int m, int n, int k, ap::complex alpha, const ap::complex_2d_array& _a, int ia, int ja, int optypea, const ap::complex_2d_array& _b, int ib, int jb, int optypeb, ap::complex beta, ap::complex_2d_array& _c, int ic, int jc) { if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block ) return false; const ap::complex *arow; ap::complex *crow; int i, stride, cstride; double __abuf[2*alglib_c_block+alglib_simd_alignment]; double __b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf,alglib_simd_alignment); double * const b = (double * const) alglib_align(__b, alglib_simd_alignment); // // copy b // int brows = optypeb==0 ? k : n; int bcols = optypeb==0 ? n : k; if( optypeb==0 ) mcopyblock_complex(brows, bcols, &_b(ib,jb), 1, _b.getstride(), b); if( optypeb==1 ) mcopyblock_complex(brows, bcols, &_b(ib,jb), 0, _b.getstride(), b); if( optypeb==2 ) mcopyblock_complex(brows, bcols, &_b(ib,jb), 3, _b.getstride(), b); // // multiply B by A (from the right, by rows) // and store result in C // arow = &_a(ia,ja); crow = &_c(ic,jc); stride = _a.getstride(); cstride = _c.getstride(); for(i=0; i<m; i++) { if( optypea==0 ) { vcopy_complex(k, arow, 1, abuf, 1, "No conj"); arow += stride; } else if( optypea==1 ) { vcopy_complex(k, arow, stride, abuf, 1, "No conj"); arow++; } else { vcopy_complex(k, arow, stride, abuf, 1, "Conj"); arow++; } if( beta==0 ) vzero_complex(n, crow, 1); mv_complex(n, k, b, abuf, crow, NULL, 1, alpha, beta); crow += cstride; } return true; }
/************************************************************************* -- ALGLIB -- Copyright 2005, 2007 by Bochkanov Sergey *************************************************************************/ void unpackqfromhermitiantridiagonal(const ap::complex_2d_array& a, const int& n, const bool& isupper, const ap::complex_1d_array& tau, ap::complex_2d_array& q) { int i; int j; ap::complex_1d_array v; ap::complex_1d_array work; int i_; int i1_; if( n==0 ) { return; } // // init // q.setbounds(1, n, 1, n); v.setbounds(1, n); work.setbounds(1, n); for(i = 1; i <= n; i++) { for(j = 1; j <= n; j++) { if( i==j ) { q(i,j) = 1; } else { q(i,j) = 0; } } } // // unpack Q // if( isupper ) { for(i = 1; i <= n-1; i++) { // // Apply H(i) // for(i_=1; i_<=i;i_++) { v(i_) = a(i_,i+1); } v(i) = 1; complexapplyreflectionfromtheleft(q, tau(i), v, 1, i, 1, n, work); } } else { for(i = n-1; i >= 1; i--) { // // Apply H(i) // i1_ = (i+1) - (1); for(i_=1; i_<=n-i;i_++) { v(i_) = a(i_+i1_,i); } v(1) = 1; complexapplyreflectionfromtheleft(q, tau(i), v, i+1, n, 1, n, work); } } }
/******************************************************************** complex SYRK kernel ********************************************************************/ bool ialglib::_i_cmatrixsyrkf(int n, int k, double alpha, const ap::complex_2d_array& a, int ia, int ja, int optypea, double beta, ap::complex_2d_array& c, int ic, int jc, bool isupper) { if( n>alglib_c_block || k>alglib_c_block ) return false; if( n==0 ) return true; // // local buffers // double *arow, *crow; int i; double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __tmpbuf[2*alglib_c_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf, alglib_simd_alignment); double * const cbuf = (double * const) alglib_align(__cbuf, alglib_simd_alignment); double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment); // // copy A and C, task is transformed to "A*A^H"-form. // if beta==0, then C is filled by zeros (and not referenced) // // alpha==0 or k==0 are correctly processed (A is not referenced) // if( alpha==0 ) k = 0; if( k>0 ) { if( optypea==0 ) mcopyblock_complex(n, k, &a(ia,ja), 3, a.getstride(), abuf); else mcopyblock_complex(k, n, &a(ia,ja), 1, a.getstride(), abuf); } mcopyblock_complex(n, n, &c(ic,jc), 0, c.getstride(), cbuf); if( beta==0 ) { for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block) if( isupper ) vzero(2*(n-i), crow+2*i, 1); else vzero(2*(i+1), crow, 1); } // // update C // if( isupper ) { for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block) { vcopy_complex(k, arow, 1, tmpbuf, 1, "Conj"); mv_complex(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, alpha, beta); } } else { for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block) { vcopy_complex(k, arow, 1, tmpbuf, 1, "Conj"); mv_complex(i+1, k, abuf, tmpbuf, NULL, crow, 1, alpha, beta); } } // // copy back // mcopyunblock_complex(n, n, cbuf, 0, &c(ic,jc), c.getstride()); return true; }
/******************************************************************** complex TRSM kernel ********************************************************************/ bool ialglib::_i_cmatrixlefttrsmf(int m, int n, const ap::complex_2d_array& a, int i1, int j1, bool isupper, bool isunit, int optype, ap::complex_2d_array& x, int i2, int j2) { if( m>alglib_c_block || n>alglib_c_block ) return false; // // local buffers // double *pdiag, *arow; int i; double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __tmpbuf[2*alglib_c_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf, alglib_simd_alignment); double * const xbuf = (double * const) alglib_align(__xbuf, alglib_simd_alignment); double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment); // // Prepare // Transpose X (so we may use mv, which calculates A*x, but not x*A) // bool uppera; mcopyblock_complex(m, m, &a(i1,j1), optype, a.getstride(), abuf); mcopyblock_complex(m, n, &x(i2,j2), 1, x.getstride(), xbuf); if( isunit ) for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1)) { pdiag[0] = 1.0; pdiag[1] = 0.0; } if( optype==0 ) uppera = isupper; else uppera = !isupper; // // Solve A^-1*Y^T=X^T where A is upper or lower triangular // if( uppera ) { for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1)) { ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]); ap::complex alpha; alpha.x = -beta.x; alpha.y = -beta.y; vcopy_complex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj"); mv_complex(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); } mcopyunblock_complex(m, n, xbuf, 1, &x(i2,j2), x.getstride()); } else { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block) { ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]); ap::complex alpha; alpha.x = -beta.x; alpha.y = -beta.y; vcopy_complex(i, arow, 1, tmpbuf, 1, "No conj"); mv_complex(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); } mcopyunblock_complex(m, n, xbuf, 1, &x(i2,j2), x.getstride()); } return true; }