static std::shared_ptr< backend::crs<Val, Col, Ptr> > interpolation( const AMatrix &A, const std::vector<Val> &Adia, const backend::crs<Val, Col, Ptr> &P_tent, std::vector<Val> &omega ) { const size_t n = rows(P_tent); const size_t nc = cols(P_tent); auto AP = product(A, P_tent, /*sort rows: */true); omega.resize(nc, math::zero<Val>()); std::vector<Val> denum(nc, math::zero<Val>()); #pragma omp parallel { std::vector<ptrdiff_t> marker(nc, -1); // Compute A * Dinv * AP row by row and compute columnwise // scalar products necessary for computation of omega. The // actual results of matrix-matrix product are not stored. std::vector<Col> adap_col(128); std::vector<Val> adap_val(128); #pragma omp for for(ptrdiff_t ia = 0; ia < static_cast<ptrdiff_t>(n); ++ia) { adap_col.clear(); adap_val.clear(); // Form current row of ADAP matrix. for(auto a = A.row_begin(ia); a; ++a) { Col ca = a.col(); Val va = math::inverse(Adia[ca]) * a.value(); for(auto p = AP->row_begin(ca); p; ++p) { Col c = p.col(); Val v = va * p.value(); if (marker[c] < 0) { marker[c] = adap_col.size(); adap_col.push_back(c); adap_val.push_back(v); } else { adap_val[marker[c]] += v; } } } amgcl::detail::sort_row( &adap_col[0], &adap_val[0], adap_col.size() ); // Update columnwise scalar products (AP,ADAP) and (ADAP,ADAP). // 1. (AP, ADAP) for( Ptr ja = AP->ptr[ia], ea = AP->ptr[ia + 1], jb = 0, eb = adap_col.size(); ja < ea && jb < eb; ) { Col ca = AP->col[ja]; Col cb = adap_col[jb]; if (ca < cb) ++ja; else if (cb < ca) ++jb; else /*ca == cb*/ { Val v = AP->val[ja] * adap_val[jb]; #pragma omp critical omega[ca] += v; ++ja; ++jb; } } // 2. (ADAP, ADAP) (and clear marker) for(size_t j = 0, e = adap_col.size(); j < e; ++j) { Col c = adap_col[j]; Val v = adap_val[j]; #pragma omp critical denum[c] += v * v; marker[c] = -1; } } } for(size_t i = 0, m = omega.size(); i < m; ++i) omega[i] = math::inverse(denum[i]) * omega[i]; // Update AP to obtain P: P = (P_tent - D^-1 A P Omega) /* * Here we use the fact that if P(i,j) != 0, * then with necessity AP(i,j) != 0: * * AP(i,j) = sum_k(A_ik P_kj), and A_ii != 0. */ #pragma omp parallel for for(ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) { Val dia = math::inverse(Adia[i]); for(Ptr ja = AP->ptr[i], ea = AP->ptr[i+1], jp = P_tent.ptr[i], ep = P_tent.ptr[i+1]; ja < ea; ++ja ) { Col ca = AP->col[ja]; Val va = -dia * AP->val[ja] * omega[ca]; for(; jp < ep; ++jp) { Col cp = P_tent.col[jp]; if (cp > ca) break; if (cp == ca) { va += P_tent.val[jp]; break; } } AP->val[ja] = va; } } return AP; }