void IGLUShaderVariable::operator= ( float4 val ) { // Check for a valid shader index if ( m_varIdx < 0 ) return; // Check for type mismatches if ( m_isAttribute ) { AssignmentToAttribute( "vec4" ); return; } if ( m_varType != GL_FLOAT_VEC4 && m_varType != GL_DOUBLE_VEC4 ) TypeMismatch( "vec4" ); // Ensure this program is currently bound, or setting shader values fails! m_parent->PushProgram(); // For types of variable that can be assigned from our input value, assign them here if ( m_varType == GL_FLOAT_VEC4 ) glUniform4fv( m_varIdx, 1, val.GetConstDataPtr() ); if ( m_varType == GL_DOUBLE_VEC4 ) glUniform4d( m_varIdx, val.X(), val.Y(), val.Z(), val.W() ); // We have a short "program stack" so make sure to pop off. m_parent->PopProgram(); }
void addRandRect(int num, float4 min, float4 max, float spacing, float scale, float4 dmin, float4 dmax, std::vector<float4>& rvec) { /*! * Create a rectangle with at most num particles in it. * The size of the return vector will be the actual number of particles used to fill the rectangle */ srand(time(NULL)); spacing *= 1.1f; min.print("Box min: "); max.print("Box max: "); float xmin = min.x / scale; float xmax = max.x / scale; float ymin = min.y / scale; float ymax = max.y / scale; float zmin = min.z / scale; float zmax = max.z / scale; rvec.resize(num); int i=0; for (float z = zmin; z <= zmax; z+=spacing) { for (float y = ymin; y <= ymax; y+=spacing) { for (float x = xmin; x <= xmax; x+=spacing) { if (i >= num) break; //printf("adding particles: %f, %f, %f\n", x, y, z); rvec[i] = float4(x-(float) rand()/RAND_MAX,y-(float) rand()/RAND_MAX,z-(float) rand()/RAND_MAX,1.0f); i++; }}} rvec.resize(i); }
Quat MUST_USE_RESULT Quat::RotateFromTo(const float4 &sourceDirection, const float4 &targetDirection) { #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE) // Best: 12.289 nsecs / 33.144 ticks, Avg: 12.489 nsecs, Worst: 14.210 nsecs simd4f cosAngle = dot4_ps(sourceDirection.v, targetDirection.v); cosAngle = negate3_ps(cosAngle); // [+ - - -] // XYZ channels use the trigonometric formula sin(x/2) = +/-sqrt(0.5-0.5*cosx)) // The W channel uses the trigonometric formula cos(x/2) = +/-sqrt(0.5+0.5*cosx)) simd4f half = set1_ps(0.5f); simd4f cosSinHalfAngle = sqrt_ps(add_ps(half, mul_ps(half, cosAngle))); // [cos(x/2), sin(x/2), sin(x/2), sin(x/2)] simd4f axis = cross_ps(sourceDirection.v, targetDirection.v); simd4f recipLen = rsqrt_ps(dot4_ps(axis, axis)); axis = mul_ps(axis, recipLen); // [0 z y x] // Set the w component to one. simd4f one = add_ps(half, half); // [1 1 1 1] simd4f highPart = _mm_unpackhi_ps(axis, one); // [_ _ 1 z] axis = _mm_movelh_ps(axis, highPart); // [1 z y x] Quat q; q.q = mul_ps(axis, cosSinHalfAngle); return q; #else // Best: 19.970 nsecs / 53.632 ticks, Avg: 20.197 nsecs, Worst: 21.122 nsecs assume(EqualAbs(sourceDirection.w, 0.f)); assume(EqualAbs(targetDirection.w, 0.f)); return Quat::RotateFromTo(sourceDirection.xyz(), targetDirection.xyz()); #endif }
void float4::Orthonormalize(float4 &a, float4 &b) { assume(!a.IsZero()); assume(!b.IsZero()); a.Normalize(); b -= b.ProjectToNorm(a); b.Normalize(); }
void DynamicSkyLight::SetLightParams(float4 newLightDir, float startAngle, float orbitTime) { newLightDir.ANormalize(); sunStartAngle = PI + startAngle; //FIXME WHY +PI? sunOrbitTime = orbitTime; initialSunAngle = GetRadFromXY(newLightDir.x, newLightDir.z); //FIXME This function really really needs comments about what it does! if (newLightDir.w == FLT_MAX) { // old: newLightDir is position where sun reaches highest altitude const float sunLen = newLightDir.Length2D(); const float sunAzimuth = (sunLen <= 0.001f) ? PI / 2.0f : atan(newLightDir.y / sunLen); const float sunHeight = tan(sunAzimuth - 0.001f); float3 v1(cos(initialSunAngle), sunHeight, sin(initialSunAngle)); v1.ANormalize(); if (v1.y <= orbitMinSunHeight) { newLightDir = UpVector; sunOrbitHeight = v1.y; sunOrbitRad = sqrt(1.0f - sunOrbitHeight * sunOrbitHeight); } else { float3 v2(cos(initialSunAngle + PI), orbitMinSunHeight, sin(initialSunAngle + PI)); v2.ANormalize(); float3 v3 = v2 - v1; sunOrbitRad = v3.Length() / 2.0f; v3.ANormalize(); float3 v4 = (v3.cross(UpVector)).ANormalize(); float3 v5 = (v3.cross(v4)).ANormalize(); if (v5.y < 0.0f) v5 = -v5; newLightDir = v5; sunOrbitHeight = v5.dot(v1); } } else { // new: newLightDir is center position of orbit, and newLightDir.w is orbit height sunOrbitHeight = std::max(-1.0f, std::min(newLightDir.w, 1.0f)); sunOrbitRad = sqrt(1.0f - sunOrbitHeight * sunOrbitHeight); } sunRotation.LoadIdentity(); sunRotation.SetUpVector(newLightDir); const float4& peakDir = CalculateSunPos(0.0f); const float peakElev = std::max(0.01f, peakDir.y); shadowDensityFactor = 1.0f / peakElev; SetLightDir(CalculateSunPos(sunStartAngle).ANormalize()); }
float4 float4::Perpendicular(const float4 &hint, const float4 &hint2) const { assume(!this->IsZero3()); assume(EqualAbs(w, 0)); assume(hint.IsNormalized()); assume(hint2.IsNormalized()); float4 v = this->Cross(hint); float len = v.Normalize(); if (len == 0) return hint2; else return v; }
void CGlobalRendering::UpdateSunParams(float4 newSunDir, float startAngle, float orbitTime, bool iscompat) { newSunDir.ANormalize(); sunStartAngle = startAngle; sunOrbitTime = orbitTime; initialSunAngle = fastmath::coords2angle(newSunDir.x, newSunDir.z); if(iscompat) { // backwards compatible: sunDir is position where sun reaches highest altitude float sunLen = newSunDir.Length2D(); float sunAzimuth = (sunLen <= 0.001f) ? PI / 2.0f : atan(newSunDir.y / sunLen); float sunHeight = tan(sunAzimuth - 0.001f); float orbitMinSunHeight = 0.1f; // the lowest sun altitude for an auto generated orbit float3 v1(cos(initialSunAngle), sunHeight, sin(initialSunAngle)); v1.ANormalize(); if(v1.y <= orbitMinSunHeight) { newSunDir = float3(0.0f, 1.0f, 0.0f); sunOrbitHeight = v1.y; sunOrbitRad = sqrt(1.0f - sunOrbitHeight * sunOrbitHeight); } else { float3 v2(cos(initialSunAngle + PI), orbitMinSunHeight, sin(initialSunAngle + PI)); v2.ANormalize(); float3 v3 = v2 - v1; sunOrbitRad = v3.Length() / 2.0f; v3.ANormalize(); float3 v4 = v3.cross(float3(0.0f, 1.0f, 0.0f)); v4.ANormalize(); float3 v5 = v3.cross(v4); v5.ANormalize(); if(v5.y < 0) v5 = -v5; newSunDir = v5; sunOrbitHeight = v5.dot(v1); } } else { // new: sunDir is center position of orbit, and sunDir.w is orbit height sunOrbitHeight = std::max(-1.0f, std::min(newSunDir.w, 1.0f)); sunOrbitRad = sqrt(1.0f - sunOrbitHeight * sunOrbitHeight); } sunRotation.LoadIdentity(); sunRotation.SetUpVector(newSunDir); float4 peakSunDir = CalculateSunDir(0.0f); shadowDensityFactor = 1.0f / std::max(0.01f, peakSunDir.y); UpdateSun(true); }
matrix4 matrix4::rotation(float4 axis, float angle) { matrix4 result; float sin = std::sin(angle); float cos = std::cos(angle); axis.w = 0.0f; float4 u(axis.normalized()); /* * According to Redbook: * * u = axis/||axis|| * * | 0 -z y | * S = | z 0 -x | * | -y x 0 | * * M = uu^t + cos(a)(I - uu^t) + sin(a)*S * * That is: M.x = (uu^t).x + cos(a)((1 0 0)^t - uu^t.x) + sin(a) (0 -z y)^t * uu^t.x = u.x * u * And so on for the others */ result.x = u.x*u + cos*(float4(1, 0, 0, 0) - u.x*u) + sin * float4(0.0, u.z, -u.y, 0); result.y = u.y*u + cos*(float4(0, 1, 0, 0) - u.y*u) + sin * float4(-u.z, 0.0, u.x, 0); result.z = u.z*u + cos*(float4(0, 0, 1, 0) - u.z*u) + sin * float4(u.y, -u.x, 0.0, 0); result.w = float4(0, 0, 0, 1); return result; }
// set uniform to 4D vector void Shader::SetUniform(const c8 * const name, const float4 &val) { PUSH_ACTIVE_SHADER(t); Activate(); glUniform4fv(GetUniformLocation(name),1, val.GetVec()); POP_ACTIVE_SHADER(t); };
Hose::Hose(RTPS *ps, int total_n, float4 center, float4 velocity, float radius, float spacing, float4 color) { printf("Constructor!\n"); this->ps = ps; this->total_n = total_n; this->center = center; this->velocity = velocity; this->radius = radius; this->spacing = spacing; this->color = color; em_count = 0; n_count = total_n; calc_vectors(); center.print("center"); velocity.print("velocity"); }
// Multiply matrix and 4D vector together float4 Mat44::Mult(const float4 &m) const { Mat44 tr = Transpose(); __m128 matcols[] = { _mm_load_ps(tr.mat), _mm_load_ps(tr.mat+4), _mm_load_ps(tr.mat+8), _mm_load_ps(tr.mat+12) }; __m128 v = _mm_load_ps(m.GetVec()); // Broadcast vector into SSE registers __m128 xb = _mm_shuffle_ps(v,v,0x00); __m128 yb = _mm_shuffle_ps(v,v,0x55); __m128 zb = _mm_shuffle_ps(v,v,0xAA); __m128 wb = _mm_shuffle_ps(v,v,0xFF); // Perform multiplication by matrix columns xb = _mm_mul_ps(xb, matcols[0]); yb = _mm_mul_ps(yb, matcols[1]); zb = _mm_mul_ps(zb, matcols[2]); wb = _mm_mul_ps(wb, matcols[3]); // Add results __m128 r = _mm_add_ps(_mm_add_ps(xb, yb),_mm_add_ps(zb, wb)); float4 returnVec; _mm_store_ps(returnVec.GetVec(), r); return returnVec; };
bool intersect_ray_plane(const ray & ray, const float4 & plane, float * hit_t) { float denom = dot(plane.xyz(), ray.direction); if(std::abs(denom) == 0) return false; if(hit_t) *hit_t = -dot(plane, float4(ray.origin,1)) / denom; return true; }
float4 float4::Cross3(const float4 &rhs) const { #ifdef MATH_SSE return float4(_mm_cross_ps(v, rhs.v)); #else return Cross3(rhs.xyz()); #endif }
bool ray4::hitsTriangle(const float4 *points, float &length) const { const float4 p0p1(points[1] - points[0]); const float4 p0p2(points[2] - points[0]); const float4 normal = float4(p0p1.cross(p0p2)); // n * p = n * (s + t*d) = n*s + n*d*t // n(p-s) = ndt // (n(p-s))/(n*d) = t float4 dir = direction(); float4 outFactor = float4(-1.0f); float4 normalTimesDirection = normal.prod(dir); outFactor = (normalTimesDirection != float4(0.0f)).select(normal.prod(points[0] - start()) / normalTimesDirection, outFactor); if((outFactor < float4(0.0f)).all()) return false; if(outFactor.max() > 1.0f) return false; const float4 location = this->point(outFactor.max()); length = outFactor.max(); return location.isOnTriangle(points); }
void CSelectedUnits::HandleUnitBoxSelection(const float4& planeRight, const float4& planeLeft, const float4& planeTop, const float4& planeBottom) { GML_RECMUTEX_LOCK(sel); // SelectUnits CUnit* unit = NULL; int addedunits = 0; int team, lastTeam; if (gu->spectatingFullSelect || gs->godMode) { // any team's units can be *selected* // (whether they can be given orders // depends on our ability to play god) team = 0; lastTeam = teamHandler->ActiveTeams() - 1; } else { team = gu->myTeam; lastTeam = gu->myTeam; } for (; team <= lastTeam; team++) { CUnitSet& teamUnits = teamHandler->Team(team)->units; for (CUnitSet::iterator ui = teamUnits.begin(); ui != teamUnits.end(); ++ui) { const float4 vec((*ui)->midPos, 1.0f); if (vec.dot4(planeRight) < 0.0f && vec.dot4(planeLeft) < 0.0f && vec.dot4(planeTop) < 0.0f && vec.dot4(planeBottom) < 0.0f) { if (keyInput->IsKeyPressed(SDLK_LCTRL) && (selectedUnits.find(*ui) != selectedUnits.end())) { RemoveUnit(*ui); } else { AddUnit(*ui); unit = *ui; addedunits++; } } } } #if (PLAY_SOUNDS == 1) if (addedunits >= 2) { Channels::UserInterface.PlaySample(soundMultiselID); } else if (addedunits == 1) { Channels::UnitReply.PlayRandomSample(unit->unitDef->sounds.select, unit); } #endif }
float4 MUST_USE_RESULT Quat::Transform(const float4 &vec) const { assume(vec.IsWZeroOrOne()); #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE) return quat_transform_vec4(q, vec); #else return float4(Transform(vec.x, vec.y, vec.z), vec.w); #endif }
float float4::AngleBetween4(const float4 &other) const { float cosa = Dot4(other) / sqrt(LengthSq4() * other.LengthSq4()); if (cosa >= 1.f) return 0.f; else if (cosa <= -1.f) return pi; else return acos(cosa); }
// 4D Multi-octave Simplex noise. // // For each octave, a higher frequency/lower amplitude function will be added to the original. // The higher the persistence [0-1], the more of each succeeding octave will be added. float simplexNoise( const int octaves, const float persistence, const float scale, const float4 &v ) { float total = 0; float frequency = scale; float amplitude = 1; // We have to keep track of the largest possible amplitude, // because each octave adds more, and we need a value in [-1, 1]. float maxAmplitude = 0; for( int i=0; i < octaves; i++ ) { total += simplexRawNoise( v.x() * frequency, v.y() * frequency, v.z() * frequency, v.w() * frequency ) * amplitude; frequency *= 2; maxAmplitude += amplitude; amplitude *= persistence; } return total / maxAmplitude; }
float4 float4::Cross3(const float4 &rhs) const { #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE) assert((((uintptr_t)&rhs) & 15) == 0); // For SSE ops we must be 16-byte aligned. assert((((uintptr_t)this) & 15) == 0); return float4(cross_ps(v, rhs.v)); #else return Cross3(rhs.xyz()); #endif }
friend float4 operator*(float4 v, const M44& m) { // 0b00000000 = 0x00 // 0b01010101 = 0x55 // 0b10101010 = 0xAA // 0b11111111 = 0xFF float4 hvec = m.m_rows[0]*float4(_mm_shuffle_ps(v.get(), v.get(), 0x00)) + m.m_rows[1]*float4(_mm_shuffle_ps(v.get(), v.get(), 0x55)) + m.m_rows[2]*float4(_mm_shuffle_ps(v.get(), v.get(), 0xAA)) + m.m_rows[3]*float4(_mm_shuffle_ps(v.get(), v.get(), 0xFF)); // return hvec * _mm_div_ps(_mm_set1_ps(1), _mm_shuffle_ps(hvec.get(), hvec.get(), 0xFF)); // calculate approximate reciprocal of last element of hvec using // rcp and one iteration of Newton's method. This is faster than // using direct division. float4 w = _mm_shuffle_ps(hvec.get(), hvec.get(), 0xFF); float4 rcp = _mm_rcp_ps(w.get()); rcp *= (2 - rcp*w); return hvec * rcp; }
float4x4 MUST_USE_RESULT Quat::ToFloat4x4(const float4 &translation) const { assume(IsNormalized()); #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE) float4x4 m; quat_to_mat4x4(q, translation.v, m.row); return m; #else return float4x4(*this, translation.xyz()); #endif }
void Quat::SetFromAxisAngle(const float4 &axis, float angle) { assume1(EqualAbs(axis.w, 0.f), axis); assume2(axis.IsNormalized(1e-4f), axis, axis.Length4()); assume1(MATH_NS::IsFinite(angle), angle); #if defined(MATH_AUTOMATIC_SSE) && defined(MATH_SSE2) // Best: 26.499 nsecs / 71.024 ticks, Avg: 26.856 nsecs, Worst: 27.651 nsecs simd4f halfAngle = set1_ps(0.5f*angle); simd4f sinAngle, cosAngle; sincos_ps(halfAngle, &sinAngle, &cosAngle); simd4f quat = mul_ps(axis, sinAngle); // Set the w component to cosAngle. simd4f highPart = _mm_unpackhi_ps(quat, cosAngle); // [_ _ 1 z] q = _mm_movelh_ps(quat, highPart); // [1 z y x] #else // Best: 36.868 nsecs / 98.312 ticks, Avg: 36.980 nsecs, Worst: 41.477 nsecs SetFromAxisAngle(axis.xyz(), angle); #endif }
void CHoverAirMoveType::UpdateVerticalSpeed(const float4& spd, float curRelHeight, float curVertSpeed) const { float wh = wantedHeight; // wanted RELATIVE height (altitude) float ws = 0.0f; // wanted vertical speed owner->SetVelocity((spd * XZVector) + (UpVector * curVertSpeed)); if (lastColWarningType == 2) { const float3 dir = lastColWarning->midPos - owner->midPos; const float3 sdir = lastColWarning->speed - spd; if (spd.dot(dir + sdir * 20.0f) < 0.0f) { if (lastColWarning->midPos.y > owner->pos.y) { wh -= 30.0f; } else { wh += 50.0f; } } } if (curRelHeight < wh) { ws = altitudeRate; if ((spd.y > 0.0001f) && (((wh - curRelHeight) / spd.y) * accRate * 1.5f) < spd.y) { ws = 0.0f; } } else { ws = -altitudeRate; if ((spd.y < -0.0001f) && (((wh - curRelHeight) / spd.y) * accRate * 0.7f) < -spd.y) { ws = 0.0f; } } ws *= (1 - owner->beingBuilt); // note: don't want this in case unit is built on some raised platform? wh *= (1 - owner->beingBuilt); if (math::fabs(wh - curRelHeight) > 2.0f) { if (spd.y > ws) { owner->SetVelocity((spd * XZVector) + (UpVector * std::max(ws, spd.y - accRate * 1.5f))); } else { // accelerate upward faster if close to ground owner->SetVelocity((spd * XZVector) + (UpVector * std::min(ws, spd.y + accRate * ((curRelHeight < 20.0f)? 2.0f: 0.7f)))); } } else { owner->SetVelocity((spd * XZVector) + (UpVector * spd.y * 0.95f)); } // finally update w-component owner->SetSpeed(spd); }
void CSelectedUnitsHandler::HandleUnitBoxSelection(const float4& planeRight, const float4& planeLeft, const float4& planeTop, const float4& planeBottom) { CUnit* unit = NULL; int addedunits = 0; int team, lastTeam; if (gu->spectatingFullSelect || gs->godMode) { // any team's units can be *selected* // (whether they can be given orders // depends on our ability to play god) team = 0; lastTeam = teamHandler->ActiveTeams() - 1; } else { team = gu->myTeam; lastTeam = gu->myTeam; } for (; team <= lastTeam; team++) { for (CUnit* u: teamHandler->Team(team)->units) { const float4 vec(u->midPos, 1.0f); if (vec.dot4(planeRight) < 0.0f && vec.dot4(planeLeft) < 0.0f && vec.dot4(planeTop) < 0.0f && vec.dot4(planeBottom) < 0.0f) { if (KeyInput::GetKeyModState(KMOD_CTRL) && (selectedUnits.find(u) != selectedUnits.end())) { RemoveUnit(u); } else { AddUnit(u); unit = u; addedunits++; } } } } if (addedunits >= 2) { Channels::UserInterface->PlaySample(soundMultiselID); } else if (addedunits == 1) { Channels::UnitReply->PlayRandomSample(unit->unitDef->sounds.select, unit); } }
bool MUST_USE_RESULT float4::AreOrthonormal(const float4 &a, const float4 &b, const float4 &c, float epsilon) { return a.IsPerpendicular(b, epsilon) && a.IsPerpendicular(c, epsilon) && b.IsPerpendicular(c, epsilon) && a.IsNormalized(epsilon*epsilon) && b.IsNormalized(epsilon*epsilon) && c.IsNormalized(epsilon*epsilon); }
void Transform( const Ptr<ShaderResourceView> & src, const Ptr<RenderTargetView> & dst, const Vector<ColorWriteMask, 4> & colorWriteMask, const float4 & srcRect, const float4 & dstRect, const Ptr<class Sampler> & sampler, const Ptr<DepthStencilView> & dsv ) { auto rc = Global::GetRenderEngine()->GetRenderContext(); std::map<String, String> macros; for (int32_t i = 0; i < 4; ++i) { auto & writeMask = colorWriteMask[i]; String writeChannel = std::to_string(static_cast<uint32_t>(std::log2(static_cast<uint32_t>(writeMask)))); macros["COLOR_CHANNEL_" + std::to_string(i)] = writeChannel; } auto transformPS = Shader::FindOrCreate<TransformPS>(macros); transformPS->SetSampler("transformSampler", sampler ? sampler : SamplerTemplate<>::Get()); transformPS->SetSRV("srcTex", src); transformPS->Flush(); auto srcTex = src->GetResource()->Cast<Texture>(); float topLeftU = srcRect.x() / static_cast<float>(srcTex->GetDesc().width); float topLeftV = srcRect.y() / static_cast<float>(srcTex->GetDesc().height); float uvWidth = srcRect.z() / static_cast<float>(srcTex->GetDesc().width); float uvHeight = srcRect.w() / static_cast<float>(srcTex->GetDesc().height); if (uvWidth == 0.0f) uvWidth = 1.0f; if (uvHeight == 0.0f) uvHeight = 1.0f; DrawQuad({ dst }, dstRect.x(), dstRect.y(), dstRect.z(), dstRect.w(), topLeftU, topLeftV, uvWidth, uvHeight, dsv); }
float PlaneCost(const std::vector<Face> &inputfaces,const float4 &split,const WingMesh &space,int onbrep) { count[COPLANAR] = 0; count[UNDER] = 0; count[OVER] = 0; count[SPLIT] = 0; for(unsigned int i=0;i<inputfaces.size();i++) { count[FaceSplitTest(inputfaces[i],split,FUZZYWIDTH)]++; } if (space.verts.size() == 0) { // The following formula isn't that great. // Better to use volume as well eh. return (float)(abs(count[OVER]-count[UNDER]) + count[SPLIT] - count[COPLANAR]); } float volumeover =(float)1.0; float volumeunder=(float)1.0; float volumetotal=WingMeshVolume(space); WingMesh spaceunder= WingMeshCrop(space,float4( split.xyz(), split.w)); WingMesh spaceover = WingMeshCrop(space,float4(-split.xyz(),-split.w)); if(usevolcalc==1) { volumeunder = WingMeshVolume(spaceunder); volumeover = WingMeshVolume(spaceover ); } else if (usevolcalc==2) { volumeunder = sumbboxdim(spaceunder); volumeover = sumbboxdim(spaceover ); } assert(volumeover/volumetotal>=-0.01); assert(volumeunder/volumetotal>=-0.01); if(fabs((volumeover+volumeunder-volumetotal)/volumetotal)>0.01) { // ok our volume equations are starting to break down here // lets hope that we dont have too many polys to deal with at this point. volumetotal=volumeover+volumeunder; } if(solidbias && onbrep && count[OVER]==0 && count[SPLIT]==0) { return volumeunder; } return volumeover *powf(count[OVER] +1.5f*count[SPLIT],0.9f) + volumeunder*powf(count[UNDER]+1.5f*count[SPLIT],0.9f); }
float4 Quat::Transform(const float4 &vec) const { assume(vec.IsWZeroOrOne()); return float4(Transform(vec.x, vec.y, vec.z), vec.w); }
void BSPPartition(std::unique_ptr<BSPNode> n, const float4 &p, std::unique_ptr<BSPNode> & nodeunder, std::unique_ptr<BSPNode> & nodeover) { nodeunder=NULL; nodeover =NULL; if(!n) { return; } // assert(n->cell); int flag; //flag = SplitTest(n->cell,p); //assert(flag==SplitTest(*n->convex,p)); flag = n->convex.SplitTest(p); if(flag == UNDER) { nodeunder = move(n); return; } if(flag==OVER) { nodeover = move(n); return; } assert(flag==SPLIT); // Polyhedron *cellover = PolyhedronDup(n->cell); // Polyhedron *cellunder = PolyhedronDup(n->cell); // cellunder->Crop(p); // cellover->Crop(Plane(-p.normal,-p.dist)); nodeunder.reset(new BSPNode(n->xyz(), n->w)); nodeover.reset(new BSPNode(n->xyz(),n->w)); nodeunder->isleaf = n->isleaf; nodeover->isleaf = n->isleaf; // nodeunder->cell= cellunder; // nodeover->cell = cellover; nodeunder->convex = WingMeshCrop(n->convex,p); nodeover->convex = WingMeshCrop(n->convex, float4(-p.xyz(), -p.w)); if(n->isleaf==UNDER) { int i; BSPNode fake(p.xyz(), p.w); fake.under=move(nodeunder); fake.over=move(nodeover); i=n->brep.size(); while(i--){ FaceEmbed(&fake, std::move(n->brep[i])); } n->brep.clear(); nodeunder = move(fake.under); nodeover = move(fake.over); } BSPPartition(move(n->under), p, nodeunder->under, nodeover->under); BSPPartition(move(n->over), p, nodeunder->over, nodeover->over ); if(n->isleaf) { assert(nodeunder->isleaf); assert(nodeover->isleaf); return; } assert(nodeunder->over || nodeunder->under); assert(nodeover->over || nodeover->under); n.reset(); if(!nodeunder->under) { // assert(SplitTest(nodeunder->cell,*nodeunder)==OVER); nodeunder = move(nodeunder->over); } else if(!nodeunder->over) { // assert(SplitTest(nodeunder->cell,*nodeunder)==UNDER); nodeunder = move(nodeunder->under); } assert(nodeunder); assert(nodeunder->isleaf || (nodeunder->under && nodeunder->over)); if(!nodeover->under) { // assert(SplitTest(nodeover->cell,*nodeover)==OVER); nodeover = move(nodeover->over); } else if(!nodeover->over) { // assert(SplitTest(nodeover->cell,*nodeover)==UNDER); nodeover = move(nodeover->under); } assert(nodeover); assert(nodeover->isleaf || (nodeover->under && nodeover->over)); if(!nodeunder->isleaf && nodeunder->over->isleaf && nodeunder->over->isleaf==nodeunder->under->isleaf) { nodeunder->isleaf = nodeunder->over->isleaf; // pick one of the children int i; i=nodeunder->under->brep.size(); while(i--){ nodeunder->brep.push_back(nodeunder->under->brep[i]); } nodeunder->under->brep.clear(); i=nodeunder->over->brep.size(); while(i--){ nodeunder->brep.push_back(nodeunder->over->brep[i]); } nodeunder->over->brep.clear(); nodeunder->under.reset(); nodeunder->over.reset(); } // wtf: if(!nodeover->isleaf && nodeover->over->isleaf==nodeover->under->isleaf) { if(!nodeover->isleaf && nodeover->over->isleaf && nodeover->over->isleaf==nodeover->under->isleaf) { nodeover->isleaf = nodeover->over->isleaf; // pick one of the children int i; i=nodeover->under->brep.size(); while(i--){ nodeover->brep.push_back(nodeover->under->brep[i]); } nodeover->under->brep.clear(); i=nodeover->over->brep.size(); while(i--){ nodeover->brep.push_back(nodeover->over->brep[i]); } nodeover->over->brep.clear(); nodeover->under.reset(); nodeover->over.reset(); } /* if(fusenodes) { if(0==nodeunder->isleaf) { if(nodeunder->over->isleaf==UNDER) { DeriveCells(nodeunder->under,nodeunder->cell); nodeunder = nodeunder->under; // memleak } else if(nodeunder->under->isleaf==OVER) { DeriveCells(nodeunder->over,nodeunder->cell); nodeunder = nodeunder->over; // memleak } } assert(nodeunder); if(0==nodeover->isleaf) { if(nodeover->over->isleaf==UNDER) { DeriveCells(nodeover->under,nodeover->cell); nodeover = nodeover->under; // memleak } else if(nodeover->under->isleaf==OVER) { DeriveCells(nodeover->over,nodeover->cell); nodeover = nodeover->over; // memleak } } assert(nodeover); } */ }
bool AABB::IntersectLineAABB_SSE(const float4 &rayPos, const float4 &rayDir, float tNear, float tFar) const { assume(rayDir.IsNormalized4()); assume(tNear <= tFar && "AABB::IntersectLineAABB: User gave a degenerate line as input for the intersection test!"); /* For reference, this is the C++ form of the vectorized SSE code below. float4 recipDir = rayDir.RecipFast4(); float4 t1 = (aabbMinPoint - rayPos).Mul(recipDir); float4 t2 = (aabbMaxPoint - rayPos).Mul(recipDir); float4 near = t1.Min(t2); float4 far = t1.Max(t2); float4 rayDirAbs = rayDir.Abs(); if (rayDirAbs.x > 1e-4f) // ray is parallel to plane in question { tNear = Max(near.x, tNear); // tNear tracks distance to intersect (enter) the AABB. tFar = Min(far.x, tFar); // tFar tracks the distance to exit the AABB. } else if (rayPos.x < aabbMinPoint.x || rayPos.x > aabbMaxPoint.x) // early-out if the ray can't possibly enter the box. return false; if (rayDirAbs.y > 1e-4f) // ray is parallel to plane in question { tNear = Max(near.y, tNear); // tNear tracks distance to intersect (enter) the AABB. tFar = Min(far.y, tFar); // tFar tracks the distance to exit the AABB. } else if (rayPos.y < aabbMinPoint.y || rayPos.y > aabbMaxPoint.y) // early-out if the ray can't possibly enter the box. return false; if (rayDirAbs.z > 1e-4f) // ray is parallel to plane in question { tNear = Max(near.z, tNear); // tNear tracks distance to intersect (enter) the AABB. tFar = Min(far.z, tFar); // tFar tracks the distance to exit the AABB. } else if (rayPos.z < aabbMinPoint.z || rayPos.z > aabbMaxPoint.z) // early-out if the ray can't possibly enter the box. return false; return tNear < tFar; */ simd4f recipDir = rcp_ps(rayDir.v); // Note: The above performs an approximate reciprocal (11 bits of precision). // For a full precision reciprocal, perform a div: // simd4f recipDir = div_ps(set1_ps(1.f), rayDir.v); simd4f t1 = mul_ps(sub_ps(minPoint, rayPos.v), recipDir); simd4f t2 = mul_ps(sub_ps(maxPoint, rayPos.v), recipDir); simd4f nearD = min_ps(t1, t2); // [0 n3 n2 n1] simd4f farD = max_ps(t1, t2); // [0 f3 f2 f1] // Check if the ray direction is parallel to any of the cardinal axes, and if so, // mask those [near, far] ranges away from the hit test computations. simd4f rayDirAbs = abs_ps(rayDir.v); const simd4f epsilon = set1_ps(1e-4f); // zeroDirections[i] will be nonzero for each axis i the ray is parallel to. simd4f zeroDirections = cmple_ps(rayDirAbs, epsilon); const simd4f floatInf = set1_ps(FLOAT_INF); const simd4f floatNegInf = set1_ps(-FLOAT_INF); // If the ray is parallel to one of the axes, replace the slab range for that axis // with [-inf, inf] range instead. (which is a no-op in the comparisons below) nearD = cmov_ps(nearD, floatNegInf, zeroDirections); farD = cmov_ps(farD, floatInf, zeroDirections); // Next, we need to compute horizontally max(nearD[0], nearD[1], nearD[2]) and min(farD[0], farD[1], farD[2]) // to see if there is an overlap in the hit ranges. simd4f v1 = axx_bxx_ps(nearD, farD); // [f1 f1 n1 n1] simd4f v2 = ayy_byy_ps(nearD, farD); // [f2 f2 n2 n2] simd4f v3 = azz_bzz_ps(nearD, farD); // [f3 f3 n3 n3] nearD = max_ps(v1, max_ps(v2, v3)); farD = min_ps(v1, min_ps(v2, v3)); farD = wwww_ps(farD); // Unpack the result from high offset in the register. nearD = max_ps(nearD, setx_ps(tNear)); farD = min_ps(farD, setx_ps(tFar)); // Finally, test if the ranges overlap. simd4f rangeIntersects = cmple_ps(nearD, farD); // Only x channel used, higher ones ignored. // To store out out the interval of intersection, uncomment the following: // These are disabled, since without these, the whole function runs without a single memory store, // which has been profiled to be very fast! Uncommenting these causes an order-of-magnitude slowdown. // For now, using the SSE version only where the tNear and tFar ranges are not interesting. // _mm_store_ss(&tNear, nearD); // _mm_store_ss(&tFar, farD); // To avoid false positives, need to have an additional rejection test for each cardinal axis the ray direction // is parallel to. simd4f out2 = cmplt_ps(rayPos.v, minPoint); simd4f out3 = cmpgt_ps(rayPos.v, maxPoint); out2 = or_ps(out2, out3); zeroDirections = and_ps(zeroDirections, out2); simd4f yOut = yyyy_ps(zeroDirections); simd4f zOut = zzzz_ps(zeroDirections); zeroDirections = or_ps(or_ps(zeroDirections, yOut), zOut); // Intersection occurs if the slab ranges had positive overlap and if the test was not rejected by the ray being // parallel to some cardinal axis. simd4f intersects = andnot_ps(zeroDirections, rangeIntersects); simd4f epsilonMasked = and_ps(epsilon, intersects); return comieq_ss(epsilon, epsilonMasked) != 0; }