void calc_and_accum(const Pred4 &pi, const Pred4 &pj, const v4sf idx){ const v4sf dx = (pj.xH - pi.xH) + (pj.xL - pi.xL); const v4sf dy = (pj.yH - pi.yH) + (pj.yL - pi.yL); const v4sf dz = (pj.zH - pi.zH) + (pj.zL - pi.zL); const v4sf dvx = pj.vx - pi.vx; const v4sf dvy = pj.vy - pi.vy; const v4sf dvz = pj.vz - pi.vz; const v4sf r2 = dx*dx + dy*dy + dz*dz; const v4sf rv = dx*dvx + dy*dvy + dz*dvz; const v4sf rinv = rsqrt_NR(r2); const v4sf rinv2 = rinv * rinv; const v4sf c1 = REP4(-3.0f); const v4sf alpha = c1 * rinv2 * rv; const v4sf mrinv3 = pj.mass * rinv * rinv2; // idx = [i1|i2|i3|i4]; r2 = [r1|r2|r3|r4] // r2_idx0 = [i1|r1|i2|r2]; r2_idx1 = [i3|r3|i4|r4] const v4sf r2_idx0 = __builtin_ia32_unpcklps(idx, r2); const v4sf r2_idx1 = __builtin_ia32_unpckhps(idx, r2); // Find minimum distance vnnb = (v4sf)__builtin_ia32_minpd((v2df)vnnb, (v2df)r2_idx0); vnnb = (v4sf)__builtin_ia32_minpd((v2df)vnnb, (v2df)r2_idx1); ax += mrinv3 * dx; ay += mrinv3 * dy; az += mrinv3 * dz; jx += mrinv3 * (dvx + alpha * dx); jy += mrinv3 * (dvy + alpha * dy); jz += mrinv3 * (dvz + alpha * dz); }
void clear(){ const v4sf zero = REP4(0.0f); ax = ay = az = zero; jx = jy = jz = zero; const double DHUGE = std::numeric_limits<double>::max(); v2df tmp = {DHUGE,DHUGE}; vnnb = (v4sf)tmp; }
Predictor(const Particle &p, const v2df ti) { const v4sf dt = __builtin_ia32_cvtpd2ps(ti - v2df(p.time)); const v4sf s0 = __builtin_ia32_shufps(dt, dt, 0x00); const v4sf s1 = s0 + s0; const v4sf s2 = s0 * (v4sf)REP4(1.5f); this->posH = p.posH; this->posL = v4sf(p.posL) + s0*(v4sf(p.vel) + s0*(v4sf(p.acc2) + s0*(v4sf(p.jrk6)))); this->vel = v4sf(p.vel) + s1*(v4sf(p.acc2) + s2*(v4sf(p.jrk6))); }
static void irr_simd_set_list( const int addr, const int nnb, const int nblist[]) { assert(nnb <= NBlist::NB_MAX); list[addr].nnb = nnb; const int *src = nblist; int *dst = list[addr].nb; // for(int k=0; k<nnb; k+=4){ // const int i0 = src[k+0] - 1; // const int i1 = src[k+1] - 1; // const int i2 = src[k+2] - 1; // const int i3 = src[k+3] - 1; // dst[k+0] = i0; // dst[k+1] = i1; // dst[k+2] = i2; // dst[k+3] = i3; // } for(int k=0; k<nnb; k+=4){ // assert((unsigned long)dst %16 == 0); typedef int v4si __attribute__((vector_size(16))); typedef long long v2di __attribute__ ((__vector_size__ (16))); const v4si one = REP4(1); const v4si idx0 = (v4si)__builtin_ia32_loaddqu((const char *)(src+k+0)); __builtin_ia32_movntdq((v2di *)(dst+k+0), (v2di)(idx0-one)); } // fill dummy const int nmax = ::nmax; const int kmax = 4 + 4 * (1 + (nnb-1)/4); for(int k=nnb; k<kmax; k++){ dst[k] = nmax; } }
/* more thorough test of concatenation options */ void test_core_buffer__2(void) { git_buf buf = GIT_BUF_INIT; int i; char data[128]; cl_assert(buf.size == 0); /* this must be safe to do */ git_buf_free(&buf); cl_assert(buf.size == 0); cl_assert(buf.asize == 0); /* empty buffer should be empty string */ cl_assert_equal_s("", git_buf_cstr(&buf)); cl_assert(buf.size == 0); /* cl_assert(buf.asize == 0); -- should not assume what git_buf does */ /* free should set us back to the beginning */ git_buf_free(&buf); cl_assert(buf.size == 0); cl_assert(buf.asize == 0); /* add letter */ git_buf_putc(&buf, '+'); cl_assert(git_buf_oom(&buf) == 0); cl_assert_equal_s("+", git_buf_cstr(&buf)); /* add letter again */ git_buf_putc(&buf, '+'); cl_assert(git_buf_oom(&buf) == 0); cl_assert_equal_s("++", git_buf_cstr(&buf)); /* let's try that a few times */ for (i = 0; i < 16; ++i) { git_buf_putc(&buf, '+'); cl_assert(git_buf_oom(&buf) == 0); } cl_assert_equal_s("++++++++++++++++++", git_buf_cstr(&buf)); git_buf_free(&buf); /* add data */ git_buf_put(&buf, "xo", 2); cl_assert(git_buf_oom(&buf) == 0); cl_assert_equal_s("xo", git_buf_cstr(&buf)); /* add letter again */ git_buf_put(&buf, "xo", 2); cl_assert(git_buf_oom(&buf) == 0); cl_assert_equal_s("xoxo", git_buf_cstr(&buf)); /* let's try that a few times */ for (i = 0; i < 16; ++i) { git_buf_put(&buf, "xo", 2); cl_assert(git_buf_oom(&buf) == 0); } cl_assert_equal_s("xoxoxoxoxoxoxoxoxoxoxoxoxoxoxoxoxoxo", git_buf_cstr(&buf)); git_buf_free(&buf); /* set to string */ git_buf_sets(&buf, test_string); cl_assert(git_buf_oom(&buf) == 0); cl_assert_equal_s(test_string, git_buf_cstr(&buf)); /* append string */ git_buf_puts(&buf, test_string); cl_assert(git_buf_oom(&buf) == 0); cl_assert_equal_s(test_string_x2, git_buf_cstr(&buf)); /* set to string again (should overwrite - not append) */ git_buf_sets(&buf, test_string); cl_assert(git_buf_oom(&buf) == 0); cl_assert_equal_s(test_string, git_buf_cstr(&buf)); /* test clear */ git_buf_clear(&buf); cl_assert_equal_s("", git_buf_cstr(&buf)); git_buf_free(&buf); /* test extracting data into buffer */ git_buf_puts(&buf, REP4("0123456789")); cl_assert(git_buf_oom(&buf) == 0); git_buf_copy_cstr(data, sizeof(data), &buf); cl_assert_equal_s(REP4("0123456789"), data); git_buf_copy_cstr(data, 11, &buf); cl_assert_equal_s("0123456789", data); git_buf_copy_cstr(data, 3, &buf); cl_assert_equal_s("01", data); git_buf_copy_cstr(data, 1, &buf); cl_assert_equal_s("", data); git_buf_copy_cstr(data, sizeof(data), &buf); cl_assert_equal_s(REP4("0123456789"), data); git_buf_sets(&buf, REP256("x")); git_buf_copy_cstr(data, sizeof(data), &buf); /* since sizeof(data) == 128, only 127 bytes should be copied */ cl_assert_equal_s(REP4(REP16("x")) REP16("x") REP16("x") REP16("x") "xxxxxxxxxxxxxxx", data); git_buf_free(&buf); git_buf_copy_cstr(data, sizeof(data), &buf); cl_assert_equal_s("", data); }
Particle(int) // constructor for a dummy particle { posH = (v4sf){255.0f, 255.0f, 255.0f, 0.0f}; posL = vel = acc2 = jrk6 = (v4sf)REP4(0.0f); time = (v2df){0.0, 0.0}; }
static inline v4sf rsqrt_NR(const v4sf x){ const v4sf y = __builtin_ia32_rsqrtps(x); const v4sf c1 = REP4(-0.5f); const v4sf c2 = REP4(-3.0f); return (c1 * y) * (x*y*y + c2); }
void CMaterialViewer::Draw3D(float TimeDelta) { if (IsTexture && ShowChannels) return; static const CVec3 origin = { -150, 100, 100 }; // static const CVec3 origin = { -150, 50, 50 }; CVec3 lightPosV; viewAxis.UnTransformVector(origin, lightPosV); #if 0 // show light source glDisable(GL_LIGHTING); BindDefaultMaterial(true); glBegin(GL_LINES); glColor3f(1, 0, 0); CVec3 tmp; tmp = lightPosV; tmp[0] -= 20; glVertex3fv(tmp.v); tmp[0] += 40; glVertex3fv(tmp.v); tmp = lightPosV; tmp[1] -= 20; glVertex3fv(tmp.v); tmp[1] += 40; glVertex3fv(tmp.v); tmp = lightPosV; tmp[2] -= 20; glVertex3fv(tmp.v); tmp[2] += 40; glVertex3fv(tmp.v); glEnd(); #endif glColor3f(1, 1, 1); if (!IsTexture) { glEnable(GL_LIGHTING); // no lighting for textures float lightPos[4]; lightPos[0] = lightPosV[0]; lightPos[1] = lightPosV[1]; lightPos[2] = lightPosV[2]; lightPos[3] = 0; glLightfv(GL_LIGHT0, GL_POSITION, lightPos); // glMaterialf(GL_FRONT, GL_SHININESS, 20); } // bind material UUnrealMaterial *Mat = static_cast<UUnrealMaterial*>(Object); Mat->SetMaterial(); // check tangent space GLint aNormal = -1; GLint aTangent = -1; // GLint aBinormal = -1; const CShader *Sh = GCurrentShader; if (Sh) { aNormal = Sh->GetAttrib("normal"); aTangent = Sh->GetAttrib("tangent"); // aBinormal = Sh->GetAttrib("binormal"); } // and draw box ... #define A 100 // vertex #define V000 {-A, -A, -A} #define V001 {-A, -A, A} #define V010 {-A, A, -A} #define V011 {-A, A, A} #define V100 { A, -A, -A} #define V101 { A, -A, A} #define V110 { A, A, -A} #define V111 { A, A, A} static const CVec3 box[] = { V001, V000, V010, V011, // near (x=-A) V111, V110, V100, V101, // far (x=+A) V101, V100, V000, V001, // left (y=-A) V011, V010, V110, V111, // right (y=+A) V010, V000, V100, V110, // bottom (z=-A) V001, V011, V111, V101, // top (z=+A) #undef A }; #define REP4(...) {__VA_ARGS__},{__VA_ARGS__},{__VA_ARGS__},{__VA_ARGS__} static const CVec4 normal[] = { REP4(-1, 0, 0, 1 ), REP4( 1, 0, 0, 1 ), REP4( 0,-1, 0, 1 ), REP4( 0, 1, 0, 1 ), REP4( 0, 0,-1, 1 ), REP4( 0, 0, 1, 1 ) }; static const CVec3 tangent[] = { REP4( 0,-1, 0 ), REP4( 0, 1, 0 ), REP4( 1, 0, 0 ), REP4(-1, 0, 0 ), REP4( 1, 0, 0 ), REP4(-1, 0, 0 ) }; // static const CVec3 binormal[] = // { // REP4( 0, 0, 1 ), // REP4( 0, 0, 1 ), // REP4( 0, 0, 1 ), // REP4( 0, 0, 1 ), // REP4( 0,-1, 0 ), // REP4( 0,-1, 0 ) // }; #undef REP4 static const float tex[][2] = { {0, 0}, {0, 1}, {1, 1}, {1, 0}, {0, 0}, {0, 1}, {1, 1}, {1, 0}, {0, 0}, {0, 1}, {1, 1}, {1, 0}, {0, 0}, {0, 1}, {1, 1}, {1, 0}, {0, 0}, {0, 1}, {1, 1}, {1, 0}, {0, 0}, {0, 1}, {1, 1}, {1, 0} }; static const int inds[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 12,13,14,15, 16,17,18,19, 20,21,22,23 }; #if 0 // verify tangents, should be suitable for binormal computation in shaders // (note: we're not verifying correspondence with UV coordinates) for (int i = 0; i < 24; i++) { CVec4 n4 = normal[i]; CVec3 n = n4.ToVec3(); CVec3 t = tangent[i]; CVec3 b = binormal[i]; CVec3 b2; cross(n, t, b2); VectorScale(b2, n4[3], b2); float dd = VectorDistance(b2, b); if (dd > 0.001f) appPrintf("dist[%d] = %g\n", i, dd); } #endif glEnableClientState(GL_VERTEX_ARRAY); glEnableClientState(GL_TEXTURE_COORD_ARRAY); glEnableClientState(GL_NORMAL_ARRAY); glVertexPointer(3, GL_FLOAT, sizeof(CVec3), box); glNormalPointer(GL_FLOAT, sizeof(CVec4), normal); glTexCoordPointer(2, GL_FLOAT, 0, tex); if (aNormal >= 0) { glEnableVertexAttribArray(aNormal); // send 4 components to decode binormal in shader glVertexAttribPointer(aNormal, 4, GL_FLOAT, GL_FALSE, sizeof(CVec4), normal); } if (aTangent >= 0) { glEnableVertexAttribArray(aTangent); glVertexAttribPointer(aTangent, 3, GL_FLOAT, GL_FALSE, sizeof(CVec3), tangent); } // if (aBinormal >= 0) // { // glEnableVertexAttribArray(aBinormal); // glVertexAttribPointer(aBinormal, 3, GL_FLOAT, GL_FALSE, sizeof(CVec3), binormal); // } glDrawElements(GL_QUADS, ARRAY_COUNT(inds), GL_UNSIGNED_INT, inds); glDisableClientState(GL_VERTEX_ARRAY); glDisableClientState(GL_TEXTURE_COORD_ARRAY); glDisableClientState(GL_NORMAL_ARRAY); // disable tangents if (aNormal >= 0) glDisableVertexAttribArray(aNormal); if (aTangent >= 0) glDisableVertexAttribArray(aTangent); // if (aBinormal >= 0) // glDisableVertexAttribArray(aBinormal); BindDefaultMaterial(true); #if 0 glBegin(GL_LINES); glColor3f(0.2, 0.2, 1); for (int i = 0; i < ARRAY_COUNT(box); i++) { glVertex3fv(box[i].v); CVec3 tmp; VectorMA(box[i], 20, normal[i], tmp); glVertex3fv(tmp.v); } glEnd(); glColor3f(1, 1, 1); #endif }