Exemplo n.º 1
0
  void calc_and_accum(const Pred4 &pi, const Pred4 &pj, const v4sf idx){
		const v4sf dx = (pj.xH - pi.xH) + (pj.xL - pi.xL);
		const v4sf dy = (pj.yH - pi.yH) + (pj.yL - pi.yL);
		const v4sf dz = (pj.zH - pi.zH) + (pj.zL - pi.zL);

		const v4sf dvx = pj.vx - pi.vx;
		const v4sf dvy = pj.vy - pi.vy;
		const v4sf dvz = pj.vz - pi.vz;

		const v4sf r2 = dx*dx  + dy*dy  + dz*dz;
		const v4sf rv = dx*dvx + dy*dvy + dz*dvz;
		const v4sf rinv   = rsqrt_NR(r2);
		const v4sf rinv2  = rinv * rinv;
		const v4sf c1     = REP4(-3.0f);
		const v4sf alpha  = c1 * rinv2 * rv;
		const v4sf mrinv3 = pj.mass * rinv * rinv2;

        // idx     = [i1|i2|i3|i4]; r2      = [r1|r2|r3|r4]
        // r2_idx0 = [i1|r1|i2|r2]; r2_idx1 = [i3|r3|i4|r4]
        const v4sf r2_idx0 = __builtin_ia32_unpcklps(idx, r2);
        const v4sf r2_idx1 = __builtin_ia32_unpckhps(idx, r2);
        // Find minimum distance
        vnnb = (v4sf)__builtin_ia32_minpd((v2df)vnnb, (v2df)r2_idx0);
        vnnb = (v4sf)__builtin_ia32_minpd((v2df)vnnb, (v2df)r2_idx1);

		ax += mrinv3 * dx;
		ay += mrinv3 * dy;
		az += mrinv3 * dz;

		jx += mrinv3 * (dvx + alpha * dx); 
		jy += mrinv3 * (dvy + alpha * dy); 
		jz += mrinv3 * (dvz + alpha * dz); 
	}
Exemplo n.º 2
0
	void clear(){
		const v4sf zero = REP4(0.0f);
		ax = ay = az = zero;
		jx = jy = jz = zero;
        const double DHUGE = std::numeric_limits<double>::max();
        v2df tmp = {DHUGE,DHUGE};
        vnnb = (v4sf)tmp;
	}
Exemplo n.º 3
0
    Predictor(const Particle &p, const v2df ti)
	{
        const v4sf dt = __builtin_ia32_cvtpd2ps(ti - v2df(p.time));
		const v4sf s0 = __builtin_ia32_shufps(dt, dt, 0x00);
		const v4sf s1 = s0 + s0;
		const v4sf s2 = s0 * (v4sf)REP4(1.5f);

		this->posH = p.posH;
		this->posL = v4sf(p.posL) + s0*(v4sf(p.vel) + s0*(v4sf(p.acc2) + s0*(v4sf(p.jrk6))));
		this->vel = v4sf(p.vel) + s1*(v4sf(p.acc2) + s2*(v4sf(p.jrk6)));
	}
Exemplo n.º 4
0
static void irr_simd_set_list(
		const int addr,
		const int nnb,
		const int nblist[])
{
	assert(nnb <= NBlist::NB_MAX);
	list[addr].nnb = nnb;

	const int *src = nblist;
	      int *dst = list[addr].nb;

	// for(int k=0; k<nnb; k+=4){
	// 	const int i0 = src[k+0] - 1;
	// 	const int i1 = src[k+1] - 1;
	// 	const int i2 = src[k+2] - 1;
	// 	const int i3 = src[k+3] - 1;
	// 	dst[k+0] = i0;
	// 	dst[k+1] = i1;
	// 	dst[k+2] = i2;
	// 	dst[k+3] = i3;
	// }
	
    for(int k=0; k<nnb; k+=4){

    // assert((unsigned long)dst %16 == 0);
	 	typedef int       v4si __attribute__((vector_size(16)));
	 	typedef long long v2di __attribute__ ((__vector_size__ (16)));
	 	const v4si one = REP4(1);
	 	const v4si idx0 = (v4si)__builtin_ia32_loaddqu((const char *)(src+k+0));
	 	__builtin_ia32_movntdq((v2di *)(dst+k+0), (v2di)(idx0-one));
	}
    
	// fill dummy
	const int nmax = ::nmax;
	const int kmax = 4 + 4 * (1 + (nnb-1)/4);
	for(int k=nnb; k<kmax; k++){
		dst[k] = nmax;
	}

}
Exemplo n.º 5
0
/* more thorough test of concatenation options */
void test_core_buffer__2(void)
{
	git_buf buf = GIT_BUF_INIT;
	int i;
	char data[128];

	cl_assert(buf.size == 0);

	/* this must be safe to do */
	git_buf_free(&buf);
	cl_assert(buf.size == 0);
	cl_assert(buf.asize == 0);

	/* empty buffer should be empty string */
	cl_assert_equal_s("", git_buf_cstr(&buf));
	cl_assert(buf.size == 0);
	/* cl_assert(buf.asize == 0); -- should not assume what git_buf does */

	/* free should set us back to the beginning */
	git_buf_free(&buf);
	cl_assert(buf.size == 0);
	cl_assert(buf.asize == 0);

	/* add letter */
	git_buf_putc(&buf, '+');
	cl_assert(git_buf_oom(&buf) == 0);
	cl_assert_equal_s("+", git_buf_cstr(&buf));

	/* add letter again */
	git_buf_putc(&buf, '+');
	cl_assert(git_buf_oom(&buf) == 0);
	cl_assert_equal_s("++", git_buf_cstr(&buf));

	/* let's try that a few times */
	for (i = 0; i < 16; ++i) {
		git_buf_putc(&buf, '+');
		cl_assert(git_buf_oom(&buf) == 0);
	}
	cl_assert_equal_s("++++++++++++++++++", git_buf_cstr(&buf));

	git_buf_free(&buf);

	/* add data */
	git_buf_put(&buf, "xo", 2);
	cl_assert(git_buf_oom(&buf) == 0);
	cl_assert_equal_s("xo", git_buf_cstr(&buf));

	/* add letter again */
	git_buf_put(&buf, "xo", 2);
	cl_assert(git_buf_oom(&buf) == 0);
	cl_assert_equal_s("xoxo", git_buf_cstr(&buf));

	/* let's try that a few times */
	for (i = 0; i < 16; ++i) {
		git_buf_put(&buf, "xo", 2);
		cl_assert(git_buf_oom(&buf) == 0);
	}
	cl_assert_equal_s("xoxoxoxoxoxoxoxoxoxoxoxoxoxoxoxoxoxo",
					   git_buf_cstr(&buf));

	git_buf_free(&buf);

	/* set to string */
	git_buf_sets(&buf, test_string);
	cl_assert(git_buf_oom(&buf) == 0);
	cl_assert_equal_s(test_string, git_buf_cstr(&buf));

	/* append string */
	git_buf_puts(&buf, test_string);
	cl_assert(git_buf_oom(&buf) == 0);
	cl_assert_equal_s(test_string_x2, git_buf_cstr(&buf));

	/* set to string again (should overwrite - not append) */
	git_buf_sets(&buf, test_string);
	cl_assert(git_buf_oom(&buf) == 0);
	cl_assert_equal_s(test_string, git_buf_cstr(&buf));

	/* test clear */
	git_buf_clear(&buf);
	cl_assert_equal_s("", git_buf_cstr(&buf));

	git_buf_free(&buf);

	/* test extracting data into buffer */
	git_buf_puts(&buf, REP4("0123456789"));
	cl_assert(git_buf_oom(&buf) == 0);

	git_buf_copy_cstr(data, sizeof(data), &buf);
	cl_assert_equal_s(REP4("0123456789"), data);
	git_buf_copy_cstr(data, 11, &buf);
	cl_assert_equal_s("0123456789", data);
	git_buf_copy_cstr(data, 3, &buf);
	cl_assert_equal_s("01", data);
	git_buf_copy_cstr(data, 1, &buf);
	cl_assert_equal_s("", data);

	git_buf_copy_cstr(data, sizeof(data), &buf);
	cl_assert_equal_s(REP4("0123456789"), data);

	git_buf_sets(&buf, REP256("x"));
	git_buf_copy_cstr(data, sizeof(data), &buf);
	/* since sizeof(data) == 128, only 127 bytes should be copied */
	cl_assert_equal_s(REP4(REP16("x")) REP16("x") REP16("x")
					   REP16("x") "xxxxxxxxxxxxxxx", data);

	git_buf_free(&buf);

	git_buf_copy_cstr(data, sizeof(data), &buf);
	cl_assert_equal_s("", data);
}
Exemplo n.º 6
0
	Particle(int) // constructor for a dummy particle
	{
		posH = (v4sf){255.0f, 255.0f, 255.0f, 0.0f};
		posL = vel = acc2 = jrk6 = (v4sf)REP4(0.0f);
		time = (v2df){0.0, 0.0};
	}
Exemplo n.º 7
0
static inline v4sf rsqrt_NR(const v4sf x){
	const v4sf y = __builtin_ia32_rsqrtps(x);
	const v4sf c1 = REP4(-0.5f);
	const v4sf c2 = REP4(-3.0f);
	return (c1 * y) * (x*y*y + c2);
}
Exemplo n.º 8
0
void CMaterialViewer::Draw3D(float TimeDelta)
{
	if (IsTexture && ShowChannels) return;

	static const CVec3 origin = { -150, 100, 100 };
//	static const CVec3 origin = { -150, 50, 50 };
	CVec3 lightPosV;
	viewAxis.UnTransformVector(origin, lightPosV);

#if 0
	// show light source
	glDisable(GL_LIGHTING);
	BindDefaultMaterial(true);
	glBegin(GL_LINES);
	glColor3f(1, 0, 0);
	CVec3 tmp;
	tmp = lightPosV;
	tmp[0] -= 20; glVertex3fv(tmp.v); tmp[0] += 40; glVertex3fv(tmp.v);
	tmp = lightPosV;
	tmp[1] -= 20; glVertex3fv(tmp.v); tmp[1] += 40; glVertex3fv(tmp.v);
	tmp = lightPosV;
	tmp[2] -= 20; glVertex3fv(tmp.v); tmp[2] += 40; glVertex3fv(tmp.v);
	glEnd();
#endif

	glColor3f(1, 1, 1);

	if (!IsTexture)
	{
		glEnable(GL_LIGHTING);	// no lighting for textures
		float lightPos[4];
		lightPos[0] = lightPosV[0];
		lightPos[1] = lightPosV[1];
		lightPos[2] = lightPosV[2];
		lightPos[3] = 0;
		glLightfv(GL_LIGHT0, GL_POSITION, lightPos);
//		glMaterialf(GL_FRONT, GL_SHININESS, 20);
	}

	// bind material
	UUnrealMaterial *Mat = static_cast<UUnrealMaterial*>(Object);
	Mat->SetMaterial();

	// check tangent space
	GLint aNormal = -1;
	GLint aTangent = -1;
//	GLint aBinormal = -1;
	const CShader *Sh = GCurrentShader;
	if (Sh)
	{
		aNormal    = Sh->GetAttrib("normal");
		aTangent   = Sh->GetAttrib("tangent");
//		aBinormal  = Sh->GetAttrib("binormal");
	}

	// and draw box ...
#define A 100
// vertex
#define V000 {-A, -A, -A}
#define V001 {-A, -A,  A}
#define V010 {-A,  A, -A}
#define V011 {-A,  A,  A}
#define V100 { A, -A, -A}
#define V101 { A, -A,  A}
#define V110 { A,  A, -A}
#define V111 { A,  A,  A}
	static const CVec3 box[] =
	{
		V001, V000, V010, V011,		// near   (x=-A)
		V111, V110,	V100, V101,		// far    (x=+A)
		V101, V100, V000, V001,		// left   (y=-A)
		V011, V010, V110, V111,		// right  (y=+A)
		V010, V000, V100, V110,		// bottom (z=-A)
		V001, V011, V111, V101,		// top    (z=+A)
#undef A
	};
#define REP4(...)	{__VA_ARGS__},{__VA_ARGS__},{__VA_ARGS__},{__VA_ARGS__}
	static const CVec4 normal[] =
	{
		REP4(-1, 0, 0, 1 ),
		REP4( 1, 0, 0, 1 ),
		REP4( 0,-1, 0, 1 ),
		REP4( 0, 1, 0, 1 ),
		REP4( 0, 0,-1, 1 ),
		REP4( 0, 0, 1, 1 )
	};
	static const CVec3 tangent[] =
	{
		REP4( 0,-1, 0 ),
		REP4( 0, 1, 0 ),
		REP4( 1, 0, 0 ),
		REP4(-1, 0, 0 ),
		REP4( 1, 0, 0 ),
		REP4(-1, 0, 0 )
	};
//	static const CVec3 binormal[] =
//	{
//		REP4( 0, 0, 1 ),
//		REP4( 0, 0, 1 ),
//		REP4( 0, 0, 1 ),
//		REP4( 0, 0, 1 ),
//		REP4( 0,-1, 0 ),
//		REP4( 0,-1, 0 )
//	};
#undef REP4
	static const float tex[][2] =
	{
		{0, 0}, {0, 1}, {1, 1}, {1, 0},
		{0, 0}, {0, 1}, {1, 1}, {1, 0},
		{0, 0}, {0, 1}, {1, 1}, {1, 0},
		{0, 0}, {0, 1}, {1, 1}, {1, 0},
		{0, 0}, {0, 1}, {1, 1}, {1, 0},
		{0, 0}, {0, 1}, {1, 1}, {1, 0}
	};
	static const int inds[] =
	{
		 0, 1, 2, 3,
		 4, 5, 6, 7,
		 8, 9,10,11,
		12,13,14,15,
		16,17,18,19,
		20,21,22,23
	};

#if 0
	// verify tangents, should be suitable for binormal computation in shaders
	// (note: we're not verifying correspondence with UV coordinates)
	for (int i = 0; i < 24; i++)
	{
		CVec4 n4 = normal[i];
		CVec3 n = n4.ToVec3();
		CVec3 t = tangent[i];
		CVec3 b = binormal[i];
		CVec3 b2;
		cross(n, t, b2);
		VectorScale(b2, n4[3], b2);
		float dd = VectorDistance(b2, b);
		if (dd > 0.001f) appPrintf("dist[%d] = %g\n", i, dd);
	}
#endif

	glEnableClientState(GL_VERTEX_ARRAY);
	glEnableClientState(GL_TEXTURE_COORD_ARRAY);
	glEnableClientState(GL_NORMAL_ARRAY);

	glVertexPointer(3, GL_FLOAT, sizeof(CVec3), box);
	glNormalPointer(GL_FLOAT, sizeof(CVec4), normal);
	glTexCoordPointer(2, GL_FLOAT, 0, tex);

	if (aNormal >= 0)
	{
		glEnableVertexAttribArray(aNormal);
		// send 4 components to decode binormal in shader
		glVertexAttribPointer(aNormal, 4, GL_FLOAT, GL_FALSE, sizeof(CVec4), normal);
	}
	if (aTangent >= 0)
	{
		glEnableVertexAttribArray(aTangent);
		glVertexAttribPointer(aTangent,  3, GL_FLOAT, GL_FALSE, sizeof(CVec3), tangent);
	}
//	if (aBinormal >= 0)
//	{
//		glEnableVertexAttribArray(aBinormal);
//		glVertexAttribPointer(aBinormal, 3, GL_FLOAT, GL_FALSE, sizeof(CVec3), binormal);
//	}

	glDrawElements(GL_QUADS, ARRAY_COUNT(inds), GL_UNSIGNED_INT, inds);

	glDisableClientState(GL_VERTEX_ARRAY);
	glDisableClientState(GL_TEXTURE_COORD_ARRAY);
	glDisableClientState(GL_NORMAL_ARRAY);
	// disable tangents
	if (aNormal >= 0)
		glDisableVertexAttribArray(aNormal);
	if (aTangent >= 0)
		glDisableVertexAttribArray(aTangent);
//	if (aBinormal >= 0)
//		glDisableVertexAttribArray(aBinormal);

	BindDefaultMaterial(true);

#if 0
	glBegin(GL_LINES);
	glColor3f(0.2, 0.2, 1);
	for (int i = 0; i < ARRAY_COUNT(box); i++)
	{
		glVertex3fv(box[i].v);
		CVec3 tmp;
		VectorMA(box[i], 20, normal[i], tmp);
		glVertex3fv(tmp.v);
	}
	glEnd();
	glColor3f(1, 1, 1);
#endif
}