std::vector<element_type> operator()(tbb::concurrent_priority_queue<element_type, compare_t, allocator_t> const& source) const{
     tbb::concurrent_priority_queue<element_type, compare_t, allocator_t>  cpq((source));
     std::vector<element_type> v; v.reserve(cpq.size());
     element_type element;
     while (cpq.try_pop(element)){ v.push_back(element);}
     std::reverse(v.begin(),v.end());
     return v;
 }
static inline double
pnrj_shear(size_t n, size_t lda, std::complex<double> *A, size_t p, size_t q, std::complex<double> cs[3]) {
    double Gpq = 0;
    std::complex<double> cpq(0);
    for(size_t j = 0; j < n; ++j) {
        cpq += (A[p+j*lda]*std::conj(A[q+j*lda]) - std::conj(A[j+p*lda])*A[j+q*lda]);

        if(j == p || j == q) {
            continue;
        }
        double Gterm = 0;
        Gterm += std::norm(A[p+j*lda]);
        Gterm += std::norm(A[q+j*lda]);
        Gterm += std::norm(A[j+p*lda]);
        Gterm += std::norm(A[j+q*lda]);
        Gpq += Gterm;
    }
    std::complex<double> dpq = A[q+q*lda] - A[p+p*lda];
    // xi_pq = exp(i alpha) Aqp + exp(-i alpha) Apq
    // alpha = arg(cpq) - pi/2
    // Thus, xi_pq = -i exp(i arg(cpq)) Aqp + i exp(-i arg(cpq)) Apq
    // But exp(i arg(cpq)) is simply cpq/|cpq|
    double acpq = std::abs(cpq);
    if(0 == acpq) {
        cs[0] = 1;
        cs[1] = cs[2] = 0;
        return 0;
    }
    std::complex<double> eialpha = std::complex<double>(0,-1)*(cpq/acpq);
    std::complex<double> xipq = eialpha*A[q+p*lda] + A[p+q*lda]/eialpha;
    // Now, we will generate the transformation
    // [ cs[0]  cs[2] ]
    // [ cs[1]  cs[3] ]
    // where
    // cs[0] = cs[3] = cosh y,
    // cs[1] =  i exp(-i alpha) sinh y
    // cs[2] = -i exp( i alpha) sinh y
    // and
    // tanh y = -|cpq| / (2*(|dpq|^2 + |xipq|^2) + Gpq)
    double tanhy = -acpq / (2*(std::norm(dpq) + std::norm(xipq)) + Gpq);
    // cosh^2 - sinh^2 = 1, tanh = sinh/cosh
    double coshy = double(1)/sqrt(double(1) - tanhy*tanhy);
    cs[0] = coshy; // cs[3] = cs[0]
    double sinhy = coshy*tanhy;
    cs[1] = std::complex<double>(0, sinhy)/eialpha;
    cs[2] = std::complex<double>(0,-sinhy)*eialpha;

    return 0;
}