inline void RayTracingEnvironment::FlushStreamEntry(RayStream &s,int msk)
{
	assert(msk>=0);
	assert(msk<8);
	fltx4 tmax=s.PendingRays[msk].direction.length();
	fltx4 scl=ReciprocalSaturateSIMD(tmax);
	s.PendingRays[msk].direction*=scl;					// normalize
	RayTracingResult tmpresult;
	Trace4Rays(s.PendingRays[msk],Four_Zeros,tmax,msk,&tmpresult);
	// now, write out results
	for(int r=0;r<4;r++)
	{
		RayTracingSingleResult *out=s.PendingStreamOutputs[msk][r];
		out->ray_length=SubFloat( tmax, r );
		out->surface_normal.x=tmpresult.surface_normal.X(r);
		out->surface_normal.y=tmpresult.surface_normal.Y(r);
		out->surface_normal.z=tmpresult.surface_normal.Z(r);
		out->HitID=tmpresult.HitIds[r];
		out->HitDistance=SubFloat( tmpresult.HitDistance, r );
	}
	s.n_in_stream[msk]=0;
}
void AddEmitSurfaceLights( const Vector &vStart, Vector lightBoxColor[6] )
{
	fltx4 fractionVisible;

	FourVectors vStart4, wlOrigin4;
	vStart4.DuplicateVector ( vStart );

	for ( int iLight=0; iLight < *pNumworldlights; iLight++ )
	{
		dworldlight_t *wl = &dworldlights[iLight];

		// Should this light even go in the ambient cubes?
		if ( !( wl->flags & DWL_FLAGS_INAMBIENTCUBE ) )
			continue;

		Assert( wl->type == emit_surface );

		// Can this light see the point?
		wlOrigin4.DuplicateVector ( wl->origin );
		TestLine ( vStart4, wlOrigin4, &fractionVisible );
		if ( !TestSignSIMD ( CmpGtSIMD ( fractionVisible, Four_Zeros ) ) )
			continue;

		// Add this light's contribution.
		Vector vDelta = wl->origin - vStart;
		float flDistanceScale = Engine_WorldLightDistanceFalloff( wl, vDelta );

		Vector vDeltaNorm = vDelta;
		VectorNormalize( vDeltaNorm );
		float flAngleScale = Engine_WorldLightAngle( wl, wl->normal, vDeltaNorm, vDeltaNorm );

		float ratio = flDistanceScale * flAngleScale * SubFloat ( fractionVisible, 0 );
		if ( ratio == 0 )
			continue;

		for ( int i=0; i < 6; i++ )
		{
			float t = DotProduct( g_BoxDirections[i], vDeltaNorm );
			if ( t > 0 )
			{
				lightBoxColor[i] += wl->intensity * (t * ratio);
			}
		}
	}	
}
void CLightingManager::SortLights()
{
#if DEBUG
	for ( int i = 0; i < LSORT_COUNT; i++ )
		Assert( m_hPreSortedLights[ i ].Count() == 0 );
#endif

	m_bDrawVolumetrics = false;

	float zNear = m_flzNear + 2;

	Vector vecBloat( zNear, zNear, zNear );

	Vector camMins( m_vecViewOrigin - vecBloat );
	Vector camMaxs( m_vecViewOrigin + vecBloat );

#if DEFCFG_USE_SSE
	fltx4 zNearX4 = ReplicateX4( zNear );

	for( int i = 0; i < m_uiSortDataCount; i++ )
	{
		def_light_presortdatax4_t& s = m_pSortDataX4[i];

		fltx4 adjustedMins[3] = { SubSIMD( s.bounds_min_naive[0], zNearX4 ),
			SubSIMD( s.bounds_min_naive[1], zNearX4 ),
			SubSIMD( s.bounds_min_naive[2], zNearX4 ) };

		fltx4 adjustedMaxs[3] = { AddSIMD( s.bounds_max_naive[0], zNearX4 ),
			AddSIMD( s.bounds_max_naive[1], zNearX4 ),
			AddSIMD( s.bounds_max_naive[2], zNearX4 ) };

		fltx4 needsFullscreen = IsPointInBoundsX4( m_vecViewOrigin,
			adjustedMins, adjustedMaxs );

		//Jack: this is terrible I know
		for( int i = 0; i < s.count; i++ )
		{
			if( s.lights[i]->IsSpot() )
			{
				if( _isnan( SubFloat( needsFullscreen, i ) ) )
				{
					if( !s.lights[i]->spotFrustum.CullBox( camMins, camMaxs ) )
					{
						m_hPreSortedLights[LSORT_SPOT_FULLSCREEN].AddToTail( s.lights[i] );
					}
					else
					{
						m_hPreSortedLights[LSORT_SPOT_WORLD].AddToTail( s.lights[i] );
					}
				}
				else
				{
					m_hPreSortedLights[LSORT_SPOT_WORLD].AddToTail( s.lights[i] );
				}
			}
			else
			{
				if( _isnan( SubFloat( needsFullscreen, i ) ) )
				{
					m_hPreSortedLights[LSORT_POINT_FULLSCREEN].AddToTail( s.lights[i] );
				}
				else
				{
					m_hPreSortedLights[LSORT_POINT_WORLD].AddToTail( s.lights[i] );
				}
			}
		}

		fltx4 volume = AndSIMD( s.hasVolumetrics, s.hasVolumetrics );

		m_bDrawVolumetrics = m_bDrawVolumetrics || !IsAllZeros( volume );
	}
#else
	FOR_EACH_VEC_FAST( def_light_t*, m_hRenderLights, l )
	{
		bool bNeedsFullscreen = IsPointInBounds( m_vecViewOrigin,
			l->bounds_min_naive - vecBloat,
			l->bounds_max_naive + vecBloat );

		if ( bNeedsFullscreen && l->IsSpot() )
		{
			bNeedsFullscreen = !l->spotFrustum.CullBox( camMins, camMaxs );
		}

		const bool bVolume = ( l->HasShadow() && l->HasVolumetrics() );

		m_bDrawVolumetrics = m_bDrawVolumetrics || bVolume;

		Assert( l->iLighttype * 2 + 1 < LSORT_COUNT );
		m_hPreSortedLights[ l->iLighttype * 2 + (int)bNeedsFullscreen ].AddToTail( l );
	}
void RayTracingEnvironment::ComputeVirtualLightSources(void)
{
	int start_pos=0;
	for(int b=0;b<3;b++)
	{
		int nl=LightList.Count();
		int where_to_start=start_pos;
		start_pos=nl;
		for(int l=where_to_start;l<nl;l++)
		{
			DirectionalSampler_t sample_generator;
			int n_desired=1*LightList[l].m_Color.Length();
			if (LightList[l].m_Type==MATERIAL_LIGHT_SPOT)
				n_desired*=LightList[l].m_Phi/2;
			for(int try1=0;try1<n_desired;try1++)
			{
				LightDesc_t const &li=LightList[l];
				FourRays myrays;
				myrays.origin.DuplicateVector(li.m_Position);
				RayTracingResult rslt;
				Vector trial_dir=sample_generator.NextValue();
				if (li.IsDirectionWithinLightCone(trial_dir))
				{
					myrays.direction.DuplicateVector(trial_dir);
					Trace4Rays(myrays,all_zeros,ReplicateX4(1000.0), &rslt);
					if ((rslt.HitIds[0]!=-1))
					{
						// make sure normal points back towards ray origin
						fltx4 ndoti=rslt.surface_normal*myrays.direction;
						fltx4 bad_dirs=AndSIMD(CmpGtSIMD(ndoti,Four_Zeros),
												   LoadAlignedSIMD((float *) signmask));
						
						// flip signs of all "wrong" normals
						rslt.surface_normal.x=XorSIMD(bad_dirs,rslt.surface_normal.x);
						rslt.surface_normal.y=XorSIMD(bad_dirs,rslt.surface_normal.y);
						rslt.surface_normal.z=XorSIMD(bad_dirs,rslt.surface_normal.z);

						// a hit! let's make a virtual light source

						// treat the virtual light as a disk with its center at the hit position
						// and its radius scaled by the amount of the solid angle this probe
						// represents.
						float area_of_virtual_light=
							4.0*M_PI*SQ( SubFloat( rslt.HitDistance, 0 ) )*(1.0/n_desired);

						FourVectors intens;
						intens.DuplicateVector(Vector(0,0,0));

						FourVectors surface_pos=myrays.direction;
						surface_pos*=rslt.HitDistance;
						surface_pos+=myrays.origin;
						FourVectors delta=rslt.surface_normal;
						delta*=0.1;
						surface_pos+=delta;
						LightList[l].ComputeLightAtPoints(surface_pos,rslt.surface_normal,
														  intens);
						FourVectors surf_colors;
						surf_colors.DuplicateVector(TriangleColors[rslt.HitIds[0]]);
						intens*=surf_colors;
						// see if significant
						LightDesc_t l1;
						l1.m_Type=MATERIAL_LIGHT_SPOT;
						l1.m_Position=Vector(surface_pos.X(0),surface_pos.Y(0),surface_pos.Z(0));
						l1.m_Direction=Vector(rslt.surface_normal.X(0),rslt.surface_normal.Y(0),
											  rslt.surface_normal.Z(0));
						l1.m_Color=Vector(intens.X(0),intens.Y(0),intens.Z(0));
						if (l1.m_Color.Length()>0)
						{
							l1.m_Color*=area_of_virtual_light/M_PI;
							l1.m_Range=0.0;
							l1.m_Falloff=1.0;
							l1.m_Attenuation0=1.0;
							l1.m_Attenuation1=0.0;
							l1.m_Attenuation2=1.0;			// intens falls off as 1/r^2
							l1.m_Theta=0;
							l1.m_Phi=M_PI;
							l1.RecalculateDerivedValues();
							LightList.AddToTail(l1);
						}
					}
				}
			}
		}
	}
}
void RayTracingEnvironment::Trace4Rays(const FourRays &rays, fltx4 TMin, fltx4 TMax,
									   RayTracingResult *rslt_out,
									   int32 skip_id, ITransparentTriangleCallback *pCallback)
{
	int msk=rays.CalculateDirectionSignMask();
	if (msk!=-1)
		Trace4Rays(rays,TMin,TMax,msk,rslt_out,skip_id, pCallback);
	else
	{
		// sucky case - can't trace 4 rays at once. in the worst case, need to trace all 4
		// separately, but usually we will still get 2x, Since our tracer only does 4 at a
		// time, we will have to cover up the undesired rays with the desired ray

		//!! speed!! there is room for some sse-ization here
		FourRays tmprays;
		tmprays.origin=rays.origin;

		uint8 need_trace[4]={1,1,1,1};
		for(int try_trace=0;try_trace<4;try_trace++)
		{
			if (need_trace[try_trace])
			{
				need_trace[try_trace]=2;			// going to trace it
				// replicate the ray being traced into all 4 rays
				tmprays.direction.x=ReplicateX4(rays.direction.X(try_trace));
				tmprays.direction.y=ReplicateX4(rays.direction.Y(try_trace));
				tmprays.direction.z=ReplicateX4(rays.direction.Z(try_trace));
				// now, see if any of the other remaining rays can be handled at the same time.
				for(int try2=try_trace+1;try2<4;try2++)
					if (need_trace[try2])
					{
						if (
							SameSign(rays.direction.X(try2),
									 rays.direction.X(try_trace)) &&
							SameSign(rays.direction.Y(try2),
									 rays.direction.Y(try_trace)) &&
							SameSign(rays.direction.Z(try2),
									 rays.direction.Z(try_trace)))
						{
							need_trace[try2]=2;
							tmprays.direction.X(try2) = rays.direction.X(try2);
							tmprays.direction.Y(try2) = rays.direction.Y(try2);
							tmprays.direction.Z(try2) = rays.direction.Z(try2);
						}
					}
				// ok, now trace between 1 and 3 rays, and output the results
				RayTracingResult tmpresults;
				msk=tmprays.CalculateDirectionSignMask();
				assert(msk!=-1);
				Trace4Rays(tmprays,TMin,TMax,msk,&tmpresults,skip_id, pCallback);
				// now, move results to proper place
				for(int i=0;i<4;i++)
					if (need_trace[i]==2)
					{
						need_trace[i]=0;
						rslt_out->HitIds[i]=tmpresults.HitIds[i];
						SubFloat(rslt_out->HitDistance, i) = SubFloat(tmpresults.HitDistance, i);
						rslt_out->surface_normal.X(i) = tmpresults.surface_normal.X(i);
						rslt_out->surface_normal.Y(i) = tmpresults.surface_normal.Y(i);
						rslt_out->surface_normal.Z(i) = tmpresults.surface_normal.Z(i);
					}
				
			}
		}
	}
}