/******************************************************************** complex rank-1 kernel ********************************************************************/ bool ialglib::_i_cmatrixrank1f(int m, int n, ap::complex_2d_array& a, int ia, int ja, ap::complex_1d_array& u, int uoffs, ap::complex_1d_array& v, int voffs) { ap::complex *arow, *pu, *pv, *vtmp, *dst; int n2 = n/2; int stride = a.getstride(); int i, j; // // update pairs of rows // arow = &a(ia,ja); pu = &u(uoffs); vtmp = &v(voffs); for(i=0; i<m; i++, arow+=stride, pu++) { // // update by two // for(j=0,pv=vtmp, dst=arow; j<n2; j++, dst+=2, pv+=2) { double ux = pu[0].x; double uy = pu[0].y; double v0x = pv[0].x; double v0y = pv[0].y; double v1x = pv[1].x; double v1y = pv[1].y; dst[0].x += ux*v0x-uy*v0y; dst[0].y += ux*v0y+uy*v0x; dst[1].x += ux*v1x-uy*v1y; dst[1].y += ux*v1y+uy*v1x; //dst[0] += pu[0]*pv[0]; //dst[1] += pu[0]*pv[1]; } // // final update // if( n%2!=0 ) dst[0] += pu[0]*pv[0]; } return true; }
/************************************************************************* LU inverse *************************************************************************/ static bool cmatrixinvmatlu(ap::complex_2d_array& a, const ap::integer_1d_array& pivots, int n) { bool result; ap::complex_1d_array work; int i; int iws; int j; int jb; int jj; int jp; ap::complex v; result = true; // // Quick return if possible // if( n==0 ) { return result; } work.setbounds(0, n-1); // // Form inv(U) // if( !cmatrixinvmattr(a, n, true, false) ) { result = false; return result; } // // Solve the equation inv(A)*L = inv(U) for inv(A). // for(j = n-1; j >= 0; j--) { // // Copy current column of L to WORK and replace with zeros. // for(i = j+1; i <= n-1; i++) { work(i) = a(i,j); a(i,j) = 0; } // // Compute current column of inv(A). // if( j<n-1 ) { for(i = 0; i <= n-1; i++) { v = ap::vdotproduct(&a(i, j+1), 1, "N", &work(j+1), 1, "N", ap::vlen(j+1,n-1)); a(i,j) = a(i,j)-v; } } } // // Apply column interchanges. // for(j = n-2; j >= 0; j--) { jp = pivots(j); if( jp!=j ) { ap::vmove(&work(0), 1, &a(0, j), a.getstride(), "N", ap::vlen(0,n-1)); ap::vmove(&a(0, j), a.getstride(), &a(0, jp), a.getstride(), "N", ap::vlen(0,n-1)); ap::vmove(&a(0, jp), a.getstride(), &work(0), 1, "N", ap::vlen(0,n-1)); } } return result; }
/************************************************************************* triangular inverse *************************************************************************/ static bool cmatrixinvmattr(ap::complex_2d_array& a, int n, bool isupper, bool isunittriangular) { bool result; bool nounit; int i; int j; ap::complex v; ap::complex ajj; ap::complex_1d_array t; result = true; t.setbounds(0, n-1); // // Test the input parameters. // nounit = !isunittriangular; if( isupper ) { // // Compute inverse of upper triangular matrix. // for(j = 0; j <= n-1; j++) { if( nounit ) { if( a(j,j)==0 ) { result = false; return result; } a(j,j) = 1/a(j,j); ajj = -a(j,j); } else { ajj = -1; } // // Compute elements 1:j-1 of j-th column. // if( j>0 ) { ap::vmove(&t(0), 1, &a(0, j), a.getstride(), "N", ap::vlen(0,j-1)); for(i = 0; i <= j-1; i++) { if( i<j-1 ) { v = ap::vdotproduct(&a(i, i+1), 1, "N", &t(i+1), 1, "N", ap::vlen(i+1,j-1)); } else { v = 0; } if( nounit ) { a(i,j) = v+a(i,i)*t(i); } else { a(i,j) = v+t(i); } } ap::vmul(&a(0, j), a.getstride(), ap::vlen(0,j-1), ajj); } } } else { // // Compute inverse of lower triangular matrix. // for(j = n-1; j >= 0; j--) { if( nounit ) { if( a(j,j)==0 ) { result = false; return result; } a(j,j) = 1/a(j,j); ajj = -a(j,j); } else { ajj = -1; } if( j<n-1 ) { // // Compute elements j+1:n of j-th column. // ap::vmove(&t(j+1), 1, &a(j+1, j), a.getstride(), "N", ap::vlen(j+1,n-1)); for(i = j+1; i <= n-1; i++) { if( i>j+1 ) { v = ap::vdotproduct(&a(i, j+1), 1, "N", &t(j+1), 1, "N", ap::vlen(j+1,i-1)); } else { v = 0; } if( nounit ) { a(i,j) = v+a(i,i)*t(i); } else { a(i,j) = v+t(i); } } ap::vmul(&a(j+1, j), a.getstride(), ap::vlen(j+1,n-1), ajj); } } } return result; }
/******************************************************************** complex TRSM kernel ********************************************************************/ bool ialglib::_i_cmatrixrighttrsmf(int m, int n, const ap::complex_2d_array& a, int i1, int j1, bool isupper, bool isunit, int optype, ap::complex_2d_array& x, int i2, int j2) { if( m>alglib_c_block || n>alglib_c_block ) return false; // // local buffers // double *pdiag; int i; double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __tmpbuf[2*alglib_c_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf, alglib_simd_alignment); double * const xbuf = (double * const) alglib_align(__xbuf, alglib_simd_alignment); double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment); // // Prepare // bool uppera; mcopyblock_complex(n, n, &a(i1,j1), optype, a.getstride(), abuf); mcopyblock_complex(m, n, &x(i2,j2), 0, x.getstride(), xbuf); if( isunit ) for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1)) { pdiag[0] = 1.0; pdiag[1] = 0.0; } if( optype==0 ) uppera = isupper; else uppera = !isupper; // // Solve Y*A^-1=X where A is upper or lower triangular // if( uppera ) { for(i=0,pdiag=abuf; i<n; i++,pdiag+=2*(alglib_c_block+1)) { ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]); ap::complex alpha; alpha.x = -beta.x; alpha.y = -beta.y; vcopy_complex(i, abuf+2*i, alglib_c_block, tmpbuf, 1, "No conj"); mv_complex(m, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); } mcopyunblock_complex(m, n, xbuf, 0, &x(i2,j2), x.getstride()); } else { for(i=n-1,pdiag=abuf+2*((n-1)*alglib_c_block+(n-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1)) { ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]); ap::complex alpha; alpha.x = -beta.x; alpha.y = -beta.y; vcopy_complex(n-1-i, pdiag+2*alglib_c_block, alglib_c_block, tmpbuf, 1, "No conj"); mv_complex(m, n-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); } mcopyunblock_complex(m, n, xbuf, 0, &x(i2,j2), x.getstride()); } return true; }
/******************************************************************** complex GEMM kernel ********************************************************************/ bool ialglib::_i_cmatrixgemmf(int m, int n, int k, ap::complex alpha, const ap::complex_2d_array& _a, int ia, int ja, int optypea, const ap::complex_2d_array& _b, int ib, int jb, int optypeb, ap::complex beta, ap::complex_2d_array& _c, int ic, int jc) { if( m>alglib_c_block || n>alglib_c_block || k>alglib_c_block ) return false; const ap::complex *arow; ap::complex *crow; int i, stride, cstride; double __abuf[2*alglib_c_block+alglib_simd_alignment]; double __b[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf,alglib_simd_alignment); double * const b = (double * const) alglib_align(__b, alglib_simd_alignment); // // copy b // int brows = optypeb==0 ? k : n; int bcols = optypeb==0 ? n : k; if( optypeb==0 ) mcopyblock_complex(brows, bcols, &_b(ib,jb), 1, _b.getstride(), b); if( optypeb==1 ) mcopyblock_complex(brows, bcols, &_b(ib,jb), 0, _b.getstride(), b); if( optypeb==2 ) mcopyblock_complex(brows, bcols, &_b(ib,jb), 3, _b.getstride(), b); // // multiply B by A (from the right, by rows) // and store result in C // arow = &_a(ia,ja); crow = &_c(ic,jc); stride = _a.getstride(); cstride = _c.getstride(); for(i=0; i<m; i++) { if( optypea==0 ) { vcopy_complex(k, arow, 1, abuf, 1, "No conj"); arow += stride; } else if( optypea==1 ) { vcopy_complex(k, arow, stride, abuf, 1, "No conj"); arow++; } else { vcopy_complex(k, arow, stride, abuf, 1, "Conj"); arow++; } if( beta==0 ) vzero_complex(n, crow, 1); mv_complex(n, k, b, abuf, crow, NULL, 1, alpha, beta); crow += cstride; } return true; }
/******************************************************************** complex SYRK kernel ********************************************************************/ bool ialglib::_i_cmatrixsyrkf(int n, int k, double alpha, const ap::complex_2d_array& a, int ia, int ja, int optypea, double beta, ap::complex_2d_array& c, int ic, int jc, bool isupper) { if( n>alglib_c_block || k>alglib_c_block ) return false; if( n==0 ) return true; // // local buffers // double *arow, *crow; int i; double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __cbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __tmpbuf[2*alglib_c_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf, alglib_simd_alignment); double * const cbuf = (double * const) alglib_align(__cbuf, alglib_simd_alignment); double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment); // // copy A and C, task is transformed to "A*A^H"-form. // if beta==0, then C is filled by zeros (and not referenced) // // alpha==0 or k==0 are correctly processed (A is not referenced) // if( alpha==0 ) k = 0; if( k>0 ) { if( optypea==0 ) mcopyblock_complex(n, k, &a(ia,ja), 3, a.getstride(), abuf); else mcopyblock_complex(k, n, &a(ia,ja), 1, a.getstride(), abuf); } mcopyblock_complex(n, n, &c(ic,jc), 0, c.getstride(), cbuf); if( beta==0 ) { for(i=0,crow=cbuf; i<n; i++,crow+=2*alglib_c_block) if( isupper ) vzero(2*(n-i), crow+2*i, 1); else vzero(2*(i+1), crow, 1); } // // update C // if( isupper ) { for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block) { vcopy_complex(k, arow, 1, tmpbuf, 1, "Conj"); mv_complex(n-i, k, arow, tmpbuf, NULL, crow+2*i, 1, alpha, beta); } } else { for(i=0,arow=abuf,crow=cbuf; i<n; i++,arow+=2*alglib_c_block,crow+=2*alglib_c_block) { vcopy_complex(k, arow, 1, tmpbuf, 1, "Conj"); mv_complex(i+1, k, abuf, tmpbuf, NULL, crow, 1, alpha, beta); } } // // copy back // mcopyunblock_complex(n, n, cbuf, 0, &c(ic,jc), c.getstride()); return true; }
/******************************************************************** complex TRSM kernel ********************************************************************/ bool ialglib::_i_cmatrixlefttrsmf(int m, int n, const ap::complex_2d_array& a, int i1, int j1, bool isupper, bool isunit, int optype, ap::complex_2d_array& x, int i2, int j2) { if( m>alglib_c_block || n>alglib_c_block ) return false; // // local buffers // double *pdiag, *arow; int i; double __abuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __xbuf[2*alglib_c_block*alglib_c_block+alglib_simd_alignment]; double __tmpbuf[2*alglib_c_block+alglib_simd_alignment]; double * const abuf = (double * const) alglib_align(__abuf, alglib_simd_alignment); double * const xbuf = (double * const) alglib_align(__xbuf, alglib_simd_alignment); double * const tmpbuf = (double * const) alglib_align(__tmpbuf,alglib_simd_alignment); // // Prepare // Transpose X (so we may use mv, which calculates A*x, but not x*A) // bool uppera; mcopyblock_complex(m, m, &a(i1,j1), optype, a.getstride(), abuf); mcopyblock_complex(m, n, &x(i2,j2), 1, x.getstride(), xbuf); if( isunit ) for(i=0,pdiag=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1)) { pdiag[0] = 1.0; pdiag[1] = 0.0; } if( optype==0 ) uppera = isupper; else uppera = !isupper; // // Solve A^-1*Y^T=X^T where A is upper or lower triangular // if( uppera ) { for(i=m-1,pdiag=abuf+2*((m-1)*alglib_c_block+(m-1)); i>=0; i--,pdiag-=2*(alglib_c_block+1)) { ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]); ap::complex alpha; alpha.x = -beta.x; alpha.y = -beta.y; vcopy_complex(m-1-i, pdiag+2, 1, tmpbuf, 1, "No conj"); mv_complex(n, m-1-i, xbuf+2*(i+1), tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); } mcopyunblock_complex(m, n, xbuf, 1, &x(i2,j2), x.getstride()); } else { for(i=0,pdiag=abuf,arow=abuf; i<m; i++,pdiag+=2*(alglib_c_block+1),arow+=2*alglib_c_block) { ap::complex beta = 1.0/ap::complex(pdiag[0],pdiag[1]); ap::complex alpha; alpha.x = -beta.x; alpha.y = -beta.y; vcopy_complex(i, arow, 1, tmpbuf, 1, "No conj"); mv_complex(n, i, xbuf, tmpbuf, NULL, xbuf+2*i, alglib_c_block, alpha, beta); } mcopyunblock_complex(m, n, xbuf, 1, &x(i2,j2), x.getstride()); } return true; }