void LVector::kBasis( const tmv::ConstVectorView<double>& kx, const tmv::ConstVectorView<double>& ky, tmv::MatrixView<std::complex<double> > psi_k, int order, double sigma) { assert(ky.size() == kx.size() && psi_k.nrows() == kx.size()); assert(psi_k.ncols()==PQIndex::size(order)); mBasis(kx, ky, 0, psi_k, order, sigma); }
void LVector::basis( const tmv::ConstVectorView<double>& x, const tmv::ConstVectorView<double>& y, tmv::MatrixView<double> psi, int order, double sigma) { assert(y.size() == x.size() && psi.nrows() == x.size()); assert(psi.ncols()==PQIndex::size(order)); mBasis(x, y, 0, psi, order, sigma); }
void LVector::mBasis( const tmv::ConstVectorView<double>& x, const tmv::ConstVectorView<double>& y, const tmv::ConstVectorView<double>* invsig, tmv::MatrixView<T> psi, int order, double sigma) { assert (y.size()==x.size()); assert (psi.nrows()==x.size() && psi.ncols()==PQIndex::size(order)); const int N=order; const int npts_full = x.size(); // It's faster to build the psi matrix in blocks so that more of the matrix stays in // L1 cache. For a (typical) 256 KB L2 cache size, this corresponds to 8 columns in the // cache, which is pretty good, since we are usually working on 4 columns at a time, // plus either X and Y or 3 Lq vectors. const int BLOCKING_FACTOR=4096; const int max_npts = std::max(BLOCKING_FACTOR,npts_full); tmv::DiagMatrix<double> Rsq_full(max_npts); tmv::Matrix<double> A_full(max_npts,2); tmv::Matrix<double> tmp_full(max_npts,2); tmv::DiagMatrix<double> Lmq_full(max_npts); tmv::DiagMatrix<double> Lmqm1_full(max_npts); tmv::DiagMatrix<double> Lmqm2_full(max_npts); for (int ilo=0; ilo<npts_full; ilo+=BLOCKING_FACTOR) { const int ihi = std::min(npts_full, ilo + BLOCKING_FACTOR); const int npts = ihi-ilo; // Cast arguments as diagonal matrices so we can access // vectorized element-by-element multiplication tmv::ConstDiagMatrixView<double> X = DiagMatrixViewOf(x.subVector(ilo,ihi)); tmv::ConstDiagMatrixView<double> Y = DiagMatrixViewOf(y.subVector(ilo,ihi)); // Get the appropriate portion of our temporary matrices. tmv::DiagMatrixView<double> Rsq = Rsq_full.subDiagMatrix(0,npts); tmv::MatrixView<double> A = A_full.rowRange(0,npts); tmv::MatrixView<double> tmp = tmp_full.rowRange(0,npts); // We need rsq values twice, so store them here. Rsq = X*X; Rsq += Y*Y; // This matrix will keep track of real & imag parts // of prefactor * exp(-r^2/2) (x+iy)^m / sqrt(m!) // Build the Gaussian factor for (int i=0; i<npts; i++) A.ref(i,0) = std::exp(-0.5*Rsq(i)); mBasisHelper<T>::applyPrefactor(A.col(0),sigma); A.col(1).setZero(); // Put 1/sigma factor into every point if doing a design matrix: if (invsig) A.col(0) *= tmv::DiagMatrixViewOf(invsig->subVector(ilo,ihi)); // Assign the m=0 column first: psi.col( PQIndex(0,0).rIndex(), ilo,ihi ) = A.col(0); // Then ascend m's at q=0: for (int m=1; m<=N; m++) { int rIndex = PQIndex(m,0).rIndex(); // Multiply by (X+iY)/sqrt(m), including a factor 2 first time through tmp = Y * A; A = X * A; A.col(0) += tmp.col(1); A.col(1) -= tmp.col(0); A *= m==1 ? 2. : 1./sqrtn(m); psi.subMatrix(ilo,ihi,rIndex,rIndex+2) = mBasisHelper<T>::Asign(m%4) * A; } // Make three DiagMatrix to hold Lmq's during recurrence calculations boost::shared_ptr<tmv::DiagMatrixView<double> > Lmq( new tmv::DiagMatrixView<double>(Lmq_full.subDiagMatrix(0,npts))); boost::shared_ptr<tmv::DiagMatrixView<double> > Lmqm1( new tmv::DiagMatrixView<double>(Lmqm1_full.subDiagMatrix(0,npts))); boost::shared_ptr<tmv::DiagMatrixView<double> > Lmqm2( new tmv::DiagMatrixView<double>(Lmqm2_full.subDiagMatrix(0,npts))); for (int m=0; m<=N; m++) { PQIndex pq(m,0); int iQ0 = pq.rIndex(); // Go to q=1: pq.incN(); if (pq.pastOrder(N)) continue; { // q == 1 const int p = pq.getP(); const int q = pq.getQ(); const int iQ = pq.rIndex(); Lmqm1->setAllTo(1.); // This is Lm0. *Lmq = Rsq - (p+q-1.); *Lmq *= mBasisHelper<T>::Lsign(1.) / (sqrtn(p)*sqrtn(q)); if (m==0) { psi.col(iQ,ilo,ihi) = (*Lmq) * psi.col(iQ0,ilo,ihi); } else { psi.subMatrix(ilo,ihi,iQ,iQ+2) = (*Lmq) * psi.subMatrix(ilo,ihi,iQ0,iQ0+2); } } // do q=2,... for (pq.incN(); !pq.pastOrder(N); pq.incN()) { const int p = pq.getP(); const int q = pq.getQ(); const int iQ = pq.rIndex(); // cycle the Lmq vectors // Lmqm2 <- Lmqm1 // Lmqm1 <- Lmq // Lmq <- Lmqm2 Lmqm2.swap(Lmqm1); Lmqm1.swap(Lmq); double invsqrtpq = 1./sqrtn(p)/sqrtn(q); *Lmq = Rsq - (p+q-1.); *Lmq *= mBasisHelper<T>::Lsign(invsqrtpq) * *Lmqm1; *Lmq -= (sqrtn(p-1)*sqrtn(q-1)*invsqrtpq) * (*Lmqm2); if (m==0) { psi.col(iQ,ilo,ihi) = (*Lmq) * psi.col(iQ0,ilo,ihi); } else { psi.subMatrix(ilo,ihi,iQ,iQ+2) = (*Lmq) * psi.subMatrix(ilo,ihi,iQ0,iQ0+2); } } } } }