Exemple #1
0
void exafmm_kernel::M2M(std::vector<real>& CiM, const std::vector<real>& CjM, const std::array<real, NDIM>& dist,
		const integer N) {
	std::vector<real> Ynm(FMM_P * FMM_P);
	std::vector<real> M_r(N), M_i(N);
	real rho, theta, phi;
	cart2sph(rho, theta, phi, dist);
	evalMultipole(rho, theta, -phi, Ynm);
	for (integer j = 0; j != FMM_P; ++j) {
		for (integer k = 0; k <= j; ++k) {
			const integer jkp = j * j + j + k;
			const integer jkm = j * j + j - k;
#pragma vector aligned
#pragma simd
			for (integer i = 0; i != N; ++i) {
				M_r[i] = M_i[i] = real(0.0);
			}
			for (integer n = 0; n <= j; ++n) {
				for (integer m = std::max(n - j + k, -n); m <= std::min(j - n + k, +n); ++m) {
					const integer nn = n * n + n;
					const integer nj = (j - n) * (j - n) + j - n;
					const integer jnkm = nj + k - m;
					const integer jnpkm = nj + std::abs(k - m);
					const integer jnmkm = nj - std::abs(k - m);
					const integer nmp = nn + std::abs(m);
					const integer nmm = nn - std::abs(m);
					const auto Mj_r = CjM.data() + N * jnpkm;
					const auto Mj_i = CjM.data() + N * jnmkm;
					const real tmp = Anm[nmp] * Anm[jnkm]
							/ Anm[jkp]* ODDEVEN((std::abs(k) - std::abs(m) - std::abs(k - m)) / 2 + n);
					const real sgn_km = SGN(k-m);
					const real Y_r = tmp * Ynm[nmp];
					const real Y_i = SGN(m) * tmp * Ynm[nmm];
#pragma vector aligned
#pragma simd
					for (integer i = 0; i != N; ++i) {
						COMPLEX_MULT_ADD(M_r[i], M_i[i], Y_r, Y_i, Mj_r[i], sgn_km * Mj_i[i]);
					}
				}
			}
			auto Mi_r = CiM.data() + N * jkp;
			auto Mi_i = CiM.data() + N * jkm;
#pragma vector aligned
#pragma simd
			for (integer i = 0; i != N; ++i) {
				Mi_r[i] += M_r[i];
				Mi_i[i] += (jkm == jkp) ? 0.0 : M_i[i];
			}
		}
	}
}
Exemple #2
0
void exafmm_kernel::L2L(std::vector<real>& CiL, const std::vector<real>& CjL, const std::array<real, NDIM>& dist,
		const integer N) {
	std::vector<real> Ynm(FMM_P * FMM_P);
	real rho, theta, phi;
	std::vector<real> L_r(N), L_i(N);
	cart2sph(rho, theta, phi, dist);
	evalMultipole(rho, theta, phi, Ynm);
	for (integer j = 0; j != FMM_P; ++j) {
		for (integer k = 0; k <= j; ++k) {
			integer jkp = j * j + j + k;
			integer jkm = j * j + j - k;
#pragma vector aligned
#pragma simd
			for (integer i = 0; i != N; ++i) {
				L_r[i] = L_i[i] = 0.0;
			}
			for (integer n = j; n != FMM_P; ++n) {
				for (integer m = j - n + k; m <= n - j + k; ++m) {
					const integer nn = n * n + n;
					const integer nj = (n - j) * ((n - j) + 1);
					const integer npm = nn + std::abs(m);
					const integer nmm = nn - std::abs(m);
					const integer jnpkm = nj + std::abs(m - k);
					const integer jnmkm = nj - std::abs(m - k);
					const auto Lj_r = CjL.data() + N * npm;
					const auto Lj_i = CjL.data() + N * nmm;
					const real sgn = SGN(m);
					real tmp = std::pow(-real(1.0), real(std::abs(m) - std::abs(k) - std::abs(m - k)) / 2) * Anm[jnpkm] * Anm[jkp]
							/ Anm[npm];
					const real Y_r = Ynm[jnpkm] * tmp;
					const real Y_i = SGN(m-k) * Ynm[jnmkm] * tmp;
#pragma vector aligned
#pragma simd
					for (integer i = 0; i != N; ++i) {
						COMPLEX_MULT_ADD(L_r[i], L_i[i], Y_r, Y_i, Lj_r[i], sgn * Lj_i[i]);
					}
				}
			}
			auto Li_r = CiL.data() + N * jkp;
			auto Li_i = CiL.data() + N * jkm;
#pragma vector aligned
#pragma simd
			for (integer i = 0; i != N; ++i) {
				Li_r[i] = L_r[i];
				Li_i[i] = (k == 0) ? L_r[i] : L_i[i];
			}
		}
	}
}
  /**
   * Create expansions for D_ij / G_i (Tornberg & Greengard
   */
  void P2M(const source_type& source, const charge_type& charge,
           const point_type& center, multipole_type& M) const {
    complex Ynm[4*P*P], YnmTheta[4*P*P];
    // modifications needed here
    point_type dist = static_cast<point_type>(source) - center;
    real rho, alpha, beta;
    cart2sph(rho,alpha,beta,dist);
    evalMultipole(rho,alpha,-beta,Ynm,YnmTheta);

    real g0 = charge[0], g1 = charge[1], g2 = charge[2];
    real n0 = charge[3], n1 = charge[4], n2 = charge[5];

    for (int n=0; n!=P; ++n) {
      for (int m=0; m<=n; ++m) {
        const int nm  = n * (n + 1) + m;
        const int nms = n * (n + 1) / 2 + m;

        complex brh = (double)n/rho*Ynm[nm]; // d(rho)
        complex bal = YnmTheta[nm];          // d(alpha)
        complex bbe = -complex(0,1.)*(double)m*Ynm[nm]; // d(beta)

        complex bxd = sin(alpha)*cos(beta)*brh + cos(alpha)*cos(beta)/rho*bal - sin(beta)/rho/sin(alpha)*bbe; // dx
        complex byd = sin(alpha)*sin(beta)*brh + cos(alpha)*sin(beta)/rho*bal + cos(beta)/rho/sin(alpha)*bbe; // dy
        complex bzd = cos(alpha)*brh - sin(alpha)/rho*bal; // dz

        // which order should these be in?
        real rdotn = bxd*n0 + byd*n1 + bzd*n2;
        real rdotg = bxd*g0 + byd*g1 + bzd*g2;
        M[0][nms] += (rdotn * g0 + rdotg * n0);
        M[1][nms] += (rdotn * g1 + rdotg * n1);
        M[2][nms] += (rdotn * g2 + rdotg * n2);

        real xdotg = source[0]*g0 + source[1]*g1 + source[2]*g2;
        real ndotx = n0*source[0] + n1*source[1] + n2*source[2];
        M[3][nms] += rdotn * xdotg + rdotg * ndotx;
      }
    }
  }
  /**
   * Create expansions for S_ij / F_i (Tornberg & Greengard
   */
  void P2M(const source_type& source, const charge_type& charge,
           const point_type& center, multipole_type& M) const {
    complex Ynm[4*P*P], YnmTheta[4*P*P];
    // modifications needed here
    point_type dist = static_cast<point_type>(source) - center;
    real rho, alpha, beta;
    cart2sph(rho,alpha,beta,dist);
    evalMultipole(rho,alpha,-beta,Ynm,YnmTheta);

    real f0 = charge[0], f1 = charge[1], f2 = charge[2];
    real fdotx = f0*source[0] + f1*source[1] + f2*source[2];

    for (int n=0; n!=P; ++n) {
      for (int m=0; m<=n; ++m) {
        const int nm  = n * (n + 1) + m;
        const int nms = n * (n + 1) / 2 + m;

        M[0][nms] += f0 * Ynm[nm];
        M[1][nms] += f1 * Ynm[nm];
        M[2][nms] += f2 * Ynm[nm];
        M[3][nms] += fdotx * Ynm[nm];
      }
    }
  }
 /** Kernel L2P operation
   * r += Op(L, t) where L is the local expansion and r is the result
   *
   * @param[in] L The local expansion
   * @param[in] center The center of the box with the local expansion
   * @param[in] target The target of this L2P operation
   * @param[in] result The result to accumulate into
   * @pre L includes the influence of all sources outside its box
   */
  void L2P(const local_type& L, const point_type& center,
           const target_type& target, result_type& result) const {
    complex Ynm[4*P*P], YnmTheta[4*P*P];
    point_type dist = target - center;
    point_type gradient[4]; //   = {0.,0.,0.,0.};
    gradient[0] = point_type(0.);
    gradient[1] = point_type(0.);
    gradient[2] = point_type(0.);
    gradient[3] = point_type(0.);
    point_type cartesian(0);

    real r, theta, phi;
    cart2sph(r,theta,phi,dist);
    evalMultipole(r,theta,phi,Ynm,YnmTheta);

#ifdef STRESSLET
    double scale = 1./6;
#else
    double scale = 1.;
#endif

    for( int n=0; n!=P; ++n ) {
      int nm  = n * n + n;
      int nms = n * (n + 1) / 2;
      result[0] += scale*std::real(L[0][nms] * Ynm[nm]);
      result[1] += scale*std::real(L[1][nms] * Ynm[nm]);
      result[2] += scale*std::real(L[2][nms] * Ynm[nm]);

      real factor = 1. / r * n;
      gradient[0][0] += std::real(L[0][nms] * Ynm[nm]) * factor;
      gradient[0][1] += std::real(L[0][nms] * YnmTheta[nm]);

      gradient[1][0] += std::real(L[1][nms] * Ynm[nm]) * factor;
      gradient[1][1] += std::real(L[1][nms] * YnmTheta[nm]);

      gradient[2][0] += std::real(L[2][nms] * Ynm[nm]) * factor;
      gradient[2][1] += std::real(L[2][nms] * YnmTheta[nm]);

      gradient[3][0] += std::real(L[3][nms] * Ynm[nm]) * factor;
      gradient[3][1] += std::real(L[3][nms] * YnmTheta[nm]);

      for( int m=1; m<=n; ++m ) {
        nm  = n * n + n + m;
        nms = n * (n + 1) / 2 + m;
        result[0] += scale * 2 * std::real(L[0][nms] * Ynm[nm]);
        result[1] += scale * 2 * std::real(L[1][nms] * Ynm[nm]);
        result[2] += scale * 2 * std::real(L[2][nms] * Ynm[nm]);

        gradient[0][0] += 2 * std::real(L[0][nms] * Ynm[nm]) * factor;
        gradient[0][1] += 2 * std::real(L[0][nms] * YnmTheta[nm]);
        gradient[0][2] += 2 * std::real(L[0][nms] * Ynm[nm] * CI) * m;

        gradient[1][0] += 2 * std::real(L[1][nms] * Ynm[nm]) * factor;
        gradient[1][1] += 2 * std::real(L[1][nms] * YnmTheta[nm]);
        gradient[1][2] += 2 * std::real(L[1][nms] * Ynm[nm] * CI) * m;

        gradient[2][0] += 2 * std::real(L[2][nms] * Ynm[nm]) * factor;
        gradient[2][1] += 2 * std::real(L[2][nms] * YnmTheta[nm]);
        gradient[2][2] += 2 * std::real(L[2][nms] * Ynm[nm] * CI) * m;

        gradient[3][0] += 2 * std::real(L[3][nms] * Ynm[nm]) * factor;
        gradient[3][1] += 2 * std::real(L[3][nms] * YnmTheta[nm]);
        gradient[3][2] += 2 * std::real(L[3][nms] * Ynm[nm] * CI) * m;
      }
    }
    sph2cart(r,theta,phi,gradient[0],cartesian);
    cartesian *= -target[0];
    gradient[0] = cartesian;

    sph2cart(r,theta,phi,gradient[1],cartesian);
    cartesian *= -target[1];
    gradient[1] = cartesian;

    sph2cart(r,theta,phi,gradient[2],cartesian);
    cartesian *= -target[2];
    gradient[2] = cartesian;

    sph2cart(r,theta,phi,gradient[3],cartesian);
    gradient[3] = cartesian;

    result[0] += scale*(gradient[0][0]+gradient[1][0]+gradient[2][0]+gradient[3][0]);
    result[1] += scale*(gradient[0][1]+gradient[1][1]+gradient[2][1]+gradient[3][1]);
    result[2] += scale*(gradient[0][2]+gradient[1][2]+gradient[2][2]+gradient[3][2]);
  }