void exafmm_kernel::M2M(std::vector<real>& CiM, const std::vector<real>& CjM, const std::array<real, NDIM>& dist, const integer N) { std::vector<real> Ynm(FMM_P * FMM_P); std::vector<real> M_r(N), M_i(N); real rho, theta, phi; cart2sph(rho, theta, phi, dist); evalMultipole(rho, theta, -phi, Ynm); for (integer j = 0; j != FMM_P; ++j) { for (integer k = 0; k <= j; ++k) { const integer jkp = j * j + j + k; const integer jkm = j * j + j - k; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { M_r[i] = M_i[i] = real(0.0); } for (integer n = 0; n <= j; ++n) { for (integer m = std::max(n - j + k, -n); m <= std::min(j - n + k, +n); ++m) { const integer nn = n * n + n; const integer nj = (j - n) * (j - n) + j - n; const integer jnkm = nj + k - m; const integer jnpkm = nj + std::abs(k - m); const integer jnmkm = nj - std::abs(k - m); const integer nmp = nn + std::abs(m); const integer nmm = nn - std::abs(m); const auto Mj_r = CjM.data() + N * jnpkm; const auto Mj_i = CjM.data() + N * jnmkm; const real tmp = Anm[nmp] * Anm[jnkm] / Anm[jkp]* ODDEVEN((std::abs(k) - std::abs(m) - std::abs(k - m)) / 2 + n); const real sgn_km = SGN(k-m); const real Y_r = tmp * Ynm[nmp]; const real Y_i = SGN(m) * tmp * Ynm[nmm]; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { COMPLEX_MULT_ADD(M_r[i], M_i[i], Y_r, Y_i, Mj_r[i], sgn_km * Mj_i[i]); } } } auto Mi_r = CiM.data() + N * jkp; auto Mi_i = CiM.data() + N * jkm; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { Mi_r[i] += M_r[i]; Mi_i[i] += (jkm == jkp) ? 0.0 : M_i[i]; } } } }
void exafmm_kernel::L2L(std::vector<real>& CiL, const std::vector<real>& CjL, const std::array<real, NDIM>& dist, const integer N) { std::vector<real> Ynm(FMM_P * FMM_P); real rho, theta, phi; std::vector<real> L_r(N), L_i(N); cart2sph(rho, theta, phi, dist); evalMultipole(rho, theta, phi, Ynm); for (integer j = 0; j != FMM_P; ++j) { for (integer k = 0; k <= j; ++k) { integer jkp = j * j + j + k; integer jkm = j * j + j - k; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { L_r[i] = L_i[i] = 0.0; } for (integer n = j; n != FMM_P; ++n) { for (integer m = j - n + k; m <= n - j + k; ++m) { const integer nn = n * n + n; const integer nj = (n - j) * ((n - j) + 1); const integer npm = nn + std::abs(m); const integer nmm = nn - std::abs(m); const integer jnpkm = nj + std::abs(m - k); const integer jnmkm = nj - std::abs(m - k); const auto Lj_r = CjL.data() + N * npm; const auto Lj_i = CjL.data() + N * nmm; const real sgn = SGN(m); real tmp = std::pow(-real(1.0), real(std::abs(m) - std::abs(k) - std::abs(m - k)) / 2) * Anm[jnpkm] * Anm[jkp] / Anm[npm]; const real Y_r = Ynm[jnpkm] * tmp; const real Y_i = SGN(m-k) * Ynm[jnmkm] * tmp; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { COMPLEX_MULT_ADD(L_r[i], L_i[i], Y_r, Y_i, Lj_r[i], sgn * Lj_i[i]); } } } auto Li_r = CiL.data() + N * jkp; auto Li_i = CiL.data() + N * jkm; #pragma vector aligned #pragma simd for (integer i = 0; i != N; ++i) { Li_r[i] = L_r[i]; Li_i[i] = (k == 0) ? L_r[i] : L_i[i]; } } } }
/** * Create expansions for D_ij / G_i (Tornberg & Greengard */ void P2M(const source_type& source, const charge_type& charge, const point_type& center, multipole_type& M) const { complex Ynm[4*P*P], YnmTheta[4*P*P]; // modifications needed here point_type dist = static_cast<point_type>(source) - center; real rho, alpha, beta; cart2sph(rho,alpha,beta,dist); evalMultipole(rho,alpha,-beta,Ynm,YnmTheta); real g0 = charge[0], g1 = charge[1], g2 = charge[2]; real n0 = charge[3], n1 = charge[4], n2 = charge[5]; for (int n=0; n!=P; ++n) { for (int m=0; m<=n; ++m) { const int nm = n * (n + 1) + m; const int nms = n * (n + 1) / 2 + m; complex brh = (double)n/rho*Ynm[nm]; // d(rho) complex bal = YnmTheta[nm]; // d(alpha) complex bbe = -complex(0,1.)*(double)m*Ynm[nm]; // d(beta) complex bxd = sin(alpha)*cos(beta)*brh + cos(alpha)*cos(beta)/rho*bal - sin(beta)/rho/sin(alpha)*bbe; // dx complex byd = sin(alpha)*sin(beta)*brh + cos(alpha)*sin(beta)/rho*bal + cos(beta)/rho/sin(alpha)*bbe; // dy complex bzd = cos(alpha)*brh - sin(alpha)/rho*bal; // dz // which order should these be in? real rdotn = bxd*n0 + byd*n1 + bzd*n2; real rdotg = bxd*g0 + byd*g1 + bzd*g2; M[0][nms] += (rdotn * g0 + rdotg * n0); M[1][nms] += (rdotn * g1 + rdotg * n1); M[2][nms] += (rdotn * g2 + rdotg * n2); real xdotg = source[0]*g0 + source[1]*g1 + source[2]*g2; real ndotx = n0*source[0] + n1*source[1] + n2*source[2]; M[3][nms] += rdotn * xdotg + rdotg * ndotx; } } }
/** * Create expansions for S_ij / F_i (Tornberg & Greengard */ void P2M(const source_type& source, const charge_type& charge, const point_type& center, multipole_type& M) const { complex Ynm[4*P*P], YnmTheta[4*P*P]; // modifications needed here point_type dist = static_cast<point_type>(source) - center; real rho, alpha, beta; cart2sph(rho,alpha,beta,dist); evalMultipole(rho,alpha,-beta,Ynm,YnmTheta); real f0 = charge[0], f1 = charge[1], f2 = charge[2]; real fdotx = f0*source[0] + f1*source[1] + f2*source[2]; for (int n=0; n!=P; ++n) { for (int m=0; m<=n; ++m) { const int nm = n * (n + 1) + m; const int nms = n * (n + 1) / 2 + m; M[0][nms] += f0 * Ynm[nm]; M[1][nms] += f1 * Ynm[nm]; M[2][nms] += f2 * Ynm[nm]; M[3][nms] += fdotx * Ynm[nm]; } } }
/** Kernel L2P operation * r += Op(L, t) where L is the local expansion and r is the result * * @param[in] L The local expansion * @param[in] center The center of the box with the local expansion * @param[in] target The target of this L2P operation * @param[in] result The result to accumulate into * @pre L includes the influence of all sources outside its box */ void L2P(const local_type& L, const point_type& center, const target_type& target, result_type& result) const { complex Ynm[4*P*P], YnmTheta[4*P*P]; point_type dist = target - center; point_type gradient[4]; // = {0.,0.,0.,0.}; gradient[0] = point_type(0.); gradient[1] = point_type(0.); gradient[2] = point_type(0.); gradient[3] = point_type(0.); point_type cartesian(0); real r, theta, phi; cart2sph(r,theta,phi,dist); evalMultipole(r,theta,phi,Ynm,YnmTheta); #ifdef STRESSLET double scale = 1./6; #else double scale = 1.; #endif for( int n=0; n!=P; ++n ) { int nm = n * n + n; int nms = n * (n + 1) / 2; result[0] += scale*std::real(L[0][nms] * Ynm[nm]); result[1] += scale*std::real(L[1][nms] * Ynm[nm]); result[2] += scale*std::real(L[2][nms] * Ynm[nm]); real factor = 1. / r * n; gradient[0][0] += std::real(L[0][nms] * Ynm[nm]) * factor; gradient[0][1] += std::real(L[0][nms] * YnmTheta[nm]); gradient[1][0] += std::real(L[1][nms] * Ynm[nm]) * factor; gradient[1][1] += std::real(L[1][nms] * YnmTheta[nm]); gradient[2][0] += std::real(L[2][nms] * Ynm[nm]) * factor; gradient[2][1] += std::real(L[2][nms] * YnmTheta[nm]); gradient[3][0] += std::real(L[3][nms] * Ynm[nm]) * factor; gradient[3][1] += std::real(L[3][nms] * YnmTheta[nm]); for( int m=1; m<=n; ++m ) { nm = n * n + n + m; nms = n * (n + 1) / 2 + m; result[0] += scale * 2 * std::real(L[0][nms] * Ynm[nm]); result[1] += scale * 2 * std::real(L[1][nms] * Ynm[nm]); result[2] += scale * 2 * std::real(L[2][nms] * Ynm[nm]); gradient[0][0] += 2 * std::real(L[0][nms] * Ynm[nm]) * factor; gradient[0][1] += 2 * std::real(L[0][nms] * YnmTheta[nm]); gradient[0][2] += 2 * std::real(L[0][nms] * Ynm[nm] * CI) * m; gradient[1][0] += 2 * std::real(L[1][nms] * Ynm[nm]) * factor; gradient[1][1] += 2 * std::real(L[1][nms] * YnmTheta[nm]); gradient[1][2] += 2 * std::real(L[1][nms] * Ynm[nm] * CI) * m; gradient[2][0] += 2 * std::real(L[2][nms] * Ynm[nm]) * factor; gradient[2][1] += 2 * std::real(L[2][nms] * YnmTheta[nm]); gradient[2][2] += 2 * std::real(L[2][nms] * Ynm[nm] * CI) * m; gradient[3][0] += 2 * std::real(L[3][nms] * Ynm[nm]) * factor; gradient[3][1] += 2 * std::real(L[3][nms] * YnmTheta[nm]); gradient[3][2] += 2 * std::real(L[3][nms] * Ynm[nm] * CI) * m; } } sph2cart(r,theta,phi,gradient[0],cartesian); cartesian *= -target[0]; gradient[0] = cartesian; sph2cart(r,theta,phi,gradient[1],cartesian); cartesian *= -target[1]; gradient[1] = cartesian; sph2cart(r,theta,phi,gradient[2],cartesian); cartesian *= -target[2]; gradient[2] = cartesian; sph2cart(r,theta,phi,gradient[3],cartesian); gradient[3] = cartesian; result[0] += scale*(gradient[0][0]+gradient[1][0]+gradient[2][0]+gradient[3][0]); result[1] += scale*(gradient[0][1]+gradient[1][1]+gradient[2][1]+gradient[3][1]); result[2] += scale*(gradient[0][2]+gradient[1][2]+gradient[2][2]+gradient[3][2]); }