コード例 #1
0
        static std::shared_ptr< backend::crs<Val, Col, Ptr> >
        interpolation(
                const AMatrix &A, const std::vector<Val> &Adia,
                const backend::crs<Val, Col, Ptr> &P_tent,
                std::vector<Val> &omega
                )
        {
            const size_t n  = rows(P_tent);
            const size_t nc = cols(P_tent);

            auto AP = product(A, P_tent, /*sort rows: */true);

            omega.resize(nc, math::zero<Val>());
            std::vector<Val> denum(nc, math::zero<Val>());

#pragma omp parallel
            {
                std::vector<ptrdiff_t> marker(nc, -1);

                // Compute A * Dinv * AP row by row and compute columnwise
                // scalar products necessary for computation of omega. The
                // actual results of matrix-matrix product are not stored.
                std::vector<Col> adap_col(128);
                std::vector<Val> adap_val(128);

#pragma omp for
                for(ptrdiff_t ia = 0; ia < static_cast<ptrdiff_t>(n); ++ia) {
                    adap_col.clear();
                    adap_val.clear();

                    // Form current row of ADAP matrix.
                    for(auto a = A.row_begin(ia); a; ++a) {
                        Col ca  = a.col();
                        Val va  = math::inverse(Adia[ca]) * a.value();

                        for(auto p = AP->row_begin(ca); p; ++p) {
                            Col c = p.col();
                            Val v = va * p.value();

                            if (marker[c] < 0) {
                                marker[c] = adap_col.size();
                                adap_col.push_back(c);
                                adap_val.push_back(v);
                            } else {
                                adap_val[marker[c]] += v;
                            }
                        }
                    }

                    amgcl::detail::sort_row(
                            &adap_col[0], &adap_val[0], adap_col.size()
                            );

                    // Update columnwise scalar products (AP,ADAP) and (ADAP,ADAP).
                    // 1. (AP, ADAP)
                    for(
                            Ptr ja = AP->ptr[ia], ea = AP->ptr[ia + 1],
                            jb = 0, eb = adap_col.size();
                            ja < ea && jb < eb;
                       )
                    {
                        Col ca = AP->col[ja];
                        Col cb = adap_col[jb];

                        if (ca < cb)
                            ++ja;
                        else if (cb < ca)
                            ++jb;
                        else /*ca == cb*/ {
                            Val v = AP->val[ja] * adap_val[jb];
#pragma omp critical
                            omega[ca] += v;
                            ++ja;
                            ++jb;
                        }
                    }

                    // 2. (ADAP, ADAP) (and clear marker)
                    for(size_t j = 0, e = adap_col.size(); j < e; ++j) {
                        Col c = adap_col[j];
                        Val v = adap_val[j];
#pragma omp critical
                        denum[c] += v * v;
                        marker[c] = -1;
                    }
                }
            }

            for(size_t i = 0, m = omega.size(); i < m; ++i)
                omega[i] = math::inverse(denum[i]) * omega[i];

            // Update AP to obtain P: P = (P_tent - D^-1 A P Omega)
            /*
             * Here we use the fact that if P(i,j) != 0,
             * then with necessity AP(i,j) != 0:
             *
             * AP(i,j) = sum_k(A_ik P_kj), and A_ii != 0.
             */
#pragma omp parallel for
            for(ptrdiff_t i = 0; i < static_cast<ptrdiff_t>(n); ++i) {
                Val dia = math::inverse(Adia[i]);

                for(Ptr ja = AP->ptr[i],    ea = AP->ptr[i+1],
                        jp = P_tent.ptr[i], ep = P_tent.ptr[i+1];
                        ja < ea; ++ja
                   )
                {
                    Col ca = AP->col[ja];
                    Val va = -dia * AP->val[ja] * omega[ca];

                    for(; jp < ep; ++jp) {
                        Col cp = P_tent.col[jp];
                        if (cp > ca)
                            break;

                        if (cp == ca) {
                            va += P_tent.val[jp];
                            break;
                        }
                    }

                    AP->val[ja] = va;
                }
            }

            return AP;
        }