Exemple #1
0
    Predictor(const Particle &p, const v2df ti)
	{
        const v4sf dt = __builtin_ia32_cvtpd2ps(ti - v2df(p.time));
		const v4sf s0 = __builtin_ia32_shufps(dt, dt, 0x00);
		const v4sf s1 = s0 + s0;
		const v4sf s2 = s0 * (v4sf)REP4(1.5f);

		this->posH = p.posH;
		this->posL = v4sf(p.posL) + s0*(v4sf(p.vel) + s0*(v4sf(p.acc2) + s0*(v4sf(p.jrk6))));
		this->vel = v4sf(p.vel) + s1*(v4sf(p.acc2) + s2*(v4sf(p.jrk6)));
	}
Exemple #2
0
	Predictor(
			const Particle &p, 
			const double ti)
	{
		const float dt = float(ti - p.time);
		const v4sf s0 = {dt, dt, dt, dt};
		const v4sf s1 = s0 + s0;
		const v4sf s2 = s0 * (v4sf){1.5f, 1.5f, 1.5f, 1.5f};

		posH = p.posH;
		posL = v4sf(p.posL) + s0*(v4sf(p.vel) + s0*(v4sf(p.acc2) + s0*(v4sf(p.jrk6))));
		vel = v4sf(p.vel) + s1*(v4sf(p.acc2) + s2*(v4sf(p.jrk6)));
	}
Exemple #3
0
void cnbint(
		int i,
		const double pos[][3],
		const double vel[][3],
		const double mass[],
		int nnb,
		int list[],
		double f[3],
		double fdot[3]){
	const int NBMAX = 512;
	assert(nnb <= NBMAX);

	float xbuf[NBMAX] __attribute__ ((aligned(16)));
	float ybuf[NBMAX] __attribute__ ((aligned(16)));
	float zbuf[NBMAX] __attribute__ ((aligned(16)));
	float vxbuf[NBMAX] __attribute__ ((aligned(16)));
	float vybuf[NBMAX] __attribute__ ((aligned(16)));
	float vzbuf[NBMAX] __attribute__ ((aligned(16)));
	float mbuf[NBMAX] __attribute__ ((aligned(16)));
	assert((unsigned long)xbuf % 16 == 0);

	double xi = pos[i][0];
	double yi = pos[i][1];
	double zi = pos[i][2];
	float vxi = vel[i][0];
	float vyi = vel[i][1];
	float vzi = vel[i][2];
	for(int k=0; k<nnb; k++){
		int j = list[k];
#if 1
		int jj = list[k+4];
		__builtin_prefetch(pos[jj]);
		__builtin_prefetch(pos[jj]+2);
		__builtin_prefetch(vel[jj]);
		__builtin_prefetch(vel[jj]+2);
		__builtin_prefetch(&mass[jj]);
#endif
#if 0
		xbuf[k] = pos[j][0] - xi;
		ybuf[k] = pos[j][1] - yi;
		zbuf[k] = pos[j][2] - zi;
		vxbuf[k] = vel[j][0] - vxi;
		vybuf[k] = vel[j][1] - vyi;
		vzbuf[k] = vel[j][2] - vzi;
		mbuf[k] = mass[j];
#else
		double xj = pos[j][0];
		double yj = pos[j][1];
		double zj = pos[j][2];
		float vxj = vel[j][0];
		float vyj = vel[j][1];
		float vzj = vel[j][2];
		float mj = mass[j];
		xj -= xi;
		yj -= yi;
		zj -= zi;
		vxj -= vxi;
		vyj -= vyi;
		vzj -= vzi;
		xbuf[k] = xj;
		ybuf[k] = yj;
		zbuf[k] = zj;
		vxbuf[k] = vxj;
		vybuf[k] = vyj;
		vzbuf[k] = vzj;
		mbuf[k] = mj;
#endif
	}
	for(int k=nnb; k%4; k++){
		xbuf[k] = 16.0f;
		ybuf[k] = 16.0f;
		zbuf[k] = 16.0f;
		vxbuf[k] = 0.0f;
		vybuf[k] = 0.0f;
		vzbuf[k] = 0.0f;
		mbuf[k] = 0.0f;
	}

	v4df ax(0.0), ay(0.0), az(0.0);
	v4sf jx(0.0), jy(0.0), jz(0.0);

	for(int k=0; k<nnb; k+=4){
		v4sf dx(xbuf + k);
		v4sf dy(ybuf + k);
		v4sf dz(zbuf + k);
		v4sf dvx(vxbuf + k);
		v4sf dvy(vybuf + k);
		v4sf dvz(vzbuf + k);
		v4sf mj(mbuf + k);

		v4sf r2 = dx*dx + dy*dy + dz*dz;
		v4sf rv = dx*dvx + dy*dvy + dz*dvz;
		rv *= v4sf(-3.0f);
		// v4sf rinv1 = r2.rsqrt() & v4sf(mask);
#if 1
		v4sf rinv1 = r2.rsqrt();
#else
		v4sf rinv1 = v4sf(v4df(1.0) / v4df(r2).sqrt());
#endif
		v4sf rinv2 = rinv1 * rinv1;
		rv *= rinv2;
		v4sf rinv3 = mj * rinv1 * rinv2;
		 
		dx *= rinv3; ax += v4df(dx);
		dy *= rinv3; ay += v4df(dy);
		dz *= rinv3; az += v4df(dz);
		dvx *= rinv3; jx += dvx;
		dvy *= rinv3; jy += dvy;
		dvz *= rinv3; jz += dvz;
		dx *= rv; jx += dx;
		dy *= rv; jy += dy;
		dz *= rv; jz += dz;
	}
	f[0] = ax.sum();
	f[1] = ay.sum();
	f[2] = az.sum();
	fdot[0] = (v4df(jx)).sum();
	fdot[1] = (v4df(jy)).sum();
	fdot[2] = (v4df(jz)).sum();
	assert(f[0] == f[0]);
	assert(f[1] == f[1]);
	assert(f[2] == f[2]);
	assert(fdot[0] == fdot[0]);
	assert(fdot[1] == fdot[1]);
	assert(fdot[2] == fdot[2]);
}
Exemple #4
0
void cnbint(
		int i,
		const double pos[][3],
		const double vel[][3],
		const double mass[],
		int nnb,
		int list[],
		double f[3],
		double fdot[3]){
	const int NBMAX = 512;
	assert(nnb <= NBMAX);

	double xbuf[NBMAX] __attribute__ ((aligned(16)));
	double ybuf[NBMAX] __attribute__ ((aligned(16)));
	double zbuf[NBMAX] __attribute__ ((aligned(16)));
	float vxbuf[NBMAX] __attribute__ ((aligned(16)));
	float vybuf[NBMAX] __attribute__ ((aligned(16)));
	float vzbuf[NBMAX] __attribute__ ((aligned(16)));
	float mbuf[NBMAX] __attribute__ ((aligned(16)));
	for(int k=0; k<nnb; k++){
		int j = list[k];
		xbuf[k] = pos[j][0];
		ybuf[k] = pos[j][1];
		zbuf[k] = pos[j][2];
		vxbuf[k] = vel[j][0];
		vybuf[k] = vel[j][1];
		vzbuf[k] = vel[j][2];
		mbuf[k] = mass[j];
	}
	for(int k=nnb; k%4; k++){
		xbuf[k] = 16.0;
		ybuf[k] = 16.0;
		zbuf[k] = 16.0;
		vxbuf[k] = 0.0f;
		vybuf[k] = 0.0f;
		vzbuf[k] = 0.0f;
		mbuf[k] = 0.0f;
	}

	v4df xi(pos[i][0]);
	v4df yi(pos[i][1]);
	v4df zi(pos[i][2]);
	v4sf vxi(vel[i][0]);
	v4sf vyi(vel[i][1]);
	v4sf vzi(vel[i][2]);
	v4df ax(0.0), ay(0.0), az(0.0);
	v4sf jx(0.0), jy(0.0), jz(0.0);

	for(int k=0; k<nnb; k+=4){
		v4df xj(xbuf + k);
		v4df yj(ybuf + k);
		v4df zj(zbuf + k);
		v4sf vxj(vxbuf + k);
		v4sf vyj(vybuf + k);
		v4sf vzj(vzbuf + k);
		v4sf mj(mbuf + k);

		v4sf dx = v4sf::_v4sf(xj - xi);
		v4sf dy = v4sf::_v4sf(yj - yi);
		v4sf dz = v4sf::_v4sf(zj - zi);
		v4sf dvx = vxj - vxi;
		v4sf dvy = vyj - vyi;
		v4sf dvz = vzj - vzi;

		v4sf r2 = dx*dx + dy*dy + dz*dz;
		v4sf rv = dx*dvx + dy*dvy + dz*dvz;
		rv *= v4sf(-3.0f);
		// v4sf rinv1 = r2.rsqrt() & v4sf(mask);
		v4sf rinv1 = r2.rsqrt();
		v4sf rinv2 = rinv1 * rinv1;
		rv *= rinv2;
		v4sf rinv3 = mj * rinv1 * rinv2;
		 
		dx *= rinv3; ax += v4df(dx);
		dy *= rinv3; ay += v4df(dy);
		dz *= rinv3; az += v4df(dz);
		dvx *= rinv3; jx += dvx;
		dvy *= rinv3; jy += dvy;
		dvz *= rinv3; jz += dvz;
		dx *= rv; jx += dx;
		dy *= rv; jy += dy;
		dz *= rv; jz += dz;
	}
	f[0] = ax.sum();
	f[1] = ay.sum();
	f[2] = az.sum();
	fdot[0] = (v4df(jx)).sum();
	fdot[1] = (v4df(jy)).sum();
	fdot[2] = (v4df(jz)).sum();
	assert(f[0] == f[0]);
	assert(f[1] == f[1]);
	assert(f[2] == f[2]);
	assert(fdot[0] == fdot[0]);
	assert(fdot[1] == fdot[1]);
	assert(fdot[2] == fdot[2]);
}
Exemple #5
0
 static inline v4sf rsqrt_1st(const v4sf rhs) {
     v4sf x0 = v4sf(_mm_rsqrt_ps(rhs.val));
     v4sf h  = v4sf(1.) - rhs * x0 * x0;
     return x0 + v4sf(0.5) * h * x0;
 }
Exemple #6
0
 static inline v4sf rsqrt_1st_phantom(const v4sf rhs) {
     v4sf x0 = v4sf(_mm_rsqrt_ps(rhs.val));
     return v4sf(x0 * (rhs * x0 * x0 - v4sf(3.)));
 }
Exemple #7
0
 static inline v4sf sqrt(const v4sf rhs) {
     return v4sf(_mm_sqrt_ps(rhs.val));
 }
Exemple #8
0
 static inline v4sf rsqrt_0th(const v4sf rhs) {
     return v4sf(_mm_rsqrt_ps(rhs.val));
 }
Exemple #9
0
 static inline v4sf rcp_1st(const v4sf rhs) {
     v4sf x0 = _mm_rcp_ps(rhs.val);
     v4sf h  = v4sf(1.) - rhs * x0;
     return x0 + h * x0;
 }
Exemple #10
0
 v4sf operator / (const v4sf rhs) const {
     return v4sf(_mm_div_ps(val, rhs.val));
 }
Exemple #11
0
 v4sf operator * (const v4sf rhs) const {
     return v4sf(val * rhs.val);
 }
Exemple #12
0
 v4sf operator - (const v4sf rhs) const {
     return v4sf(val - rhs.val);
 }
Exemple #13
0
 v4sf operator + (const v4sf rhs) const {
     return v4sf(val + rhs.val);
 }