void WorldToLuxelSpace( lightinfo_t const *l, FourVectors const &world, FourVectors &coord ) { FourVectors luxelOrigin; luxelOrigin.DuplicateVector ( l->luxelOrigin ); FourVectors pos = world; pos -= luxelOrigin; coord.x = pos * l->worldToLuxelSpace[0]; coord.x = SubSIMD ( coord.x, ReplicateX4 ( l->face->m_LightmapTextureMinsInLuxels[0] ) ); coord.y = pos * l->worldToLuxelSpace[1]; coord.y = SubSIMD ( coord.y, ReplicateX4 ( l->face->m_LightmapTextureMinsInLuxels[1] ) ); coord.z = Four_Zeros; }
void LuxelSpaceToWorld( lightinfo_t const *l, fltx4 s, fltx4 t, FourVectors &world ) { world.DuplicateVector ( l->luxelOrigin ); FourVectors st; s = AddSIMD ( s, ReplicateX4 ( l->face->m_LightmapTextureMinsInLuxels[0] ) ); st.DuplicateVector ( l->luxelToWorldSpace[0] ); st *= s; world += st; t = AddSIMD ( t, ReplicateX4 ( l->face->m_LightmapTextureMinsInLuxels[1] ) ); st.DuplicateVector ( l->luxelToWorldSpace[1] ); st *= t; world += st; }
CSIMDVectorMatrix & CSIMDVectorMatrix::operator*=( Vector const &src ) { int nv=NVectors(); if ( nv ) { FourVectors scalevalue; scalevalue.DuplicateVector( src ); FourVectors *destv=m_pData; do // !! speed !! inline more iters { destv->VProduct( scalevalue ); destv++; } while ( --nv ); } return *this; }
void LightDesc_t::ComputeLightAtPointsForDirectional( const FourVectors &pos, const FourVectors &normal, FourVectors &color, bool DoHalfLambert ) const { FourVectors delta; delta.DuplicateVector(m_Direction); // delta.VectorNormalizeFast(); fltx4 strength=delta*normal; if (DoHalfLambert) { strength=AddSIMD(MulSIMD(strength,Four_PointFives),Four_PointFives); } else strength=MaxSIMD(Four_Zeros,delta*normal); color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(m_Color.x))); color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(m_Color.y))); color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(m_Color.z))); }
void LightDesc_t::ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal, FourVectors &color, bool DoHalfLambert ) const { FourVectors delta; Assert((m_Type==MATERIAL_LIGHT_POINT) || (m_Type==MATERIAL_LIGHT_SPOT) || (m_Type==MATERIAL_LIGHT_DIRECTIONAL)); switch (m_Type) { case MATERIAL_LIGHT_POINT: case MATERIAL_LIGHT_SPOT: delta.DuplicateVector(m_Position); delta-=pos; break; case MATERIAL_LIGHT_DIRECTIONAL: delta.DuplicateVector(m_Direction); delta*=-1.0; break; default: delta.x = Four_Zeros; delta.y = Four_Zeros; delta.z = Four_Zeros; break; } __m128 dist2 = delta*delta; __m128 falloff; if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 ) { falloff = MMReplicate(m_Attenuation0); } else falloff= Four_Epsilons; if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 ) { falloff=_mm_add_ps(falloff,_mm_mul_ps(MMReplicate(m_Attenuation1),_mm_sqrt_ps(dist2))); } if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 ) { falloff=_mm_add_ps(falloff,_mm_mul_ps(MMReplicate(m_Attenuation2),dist2)); } falloff=_mm_rcp_ps(falloff); // Cull out light beyond this radius // now, zero out elements for which dist2 was > range^2. !!speed!! lights should store dist^2 in sse format if (m_Range != 0.f) { __m128 RangeSquared=MMReplicate(m_RangeSquared); // !!speed!! falloff=_mm_and_ps(falloff,_mm_cmplt_ps(dist2,RangeSquared)); } delta.VectorNormalizeFast(); __m128 strength=delta*normal; if (DoHalfLambert) { strength=_mm_add_ps(_mm_mul_ps(strength,Four_PointFives),Four_PointFives); } else strength=_mm_max_ps(Four_Zeros,delta*normal); switch(m_Type) { case MATERIAL_LIGHT_POINT: // half-lambert break; case MATERIAL_LIGHT_SPOT: { __m128 dot2=_mm_sub_ps(Four_Zeros,delta*m_Direction); // dot position with spot light dir for cone falloff __m128 cone_falloff_scale=_mm_mul_ps(MMReplicate(OneOver_ThetaDot_Minus_PhiDot), _mm_sub_ps(dot2,MMReplicate(m_PhiDot))); cone_falloff_scale=_mm_min_ps(cone_falloff_scale,Four_Ones); if ((m_Falloff!=0.0) && (m_Falloff!=1.0)) { // !!speed!! could compute integer exponent needed by powsse and store in light cone_falloff_scale=PowSSE(cone_falloff_scale,m_Falloff); } strength=_mm_mul_ps(cone_falloff_scale,strength); // now, zero out lighting where dot2<phidot. This will mask out any invalid results // from pow function, etc __m128 OutsideMask=_mm_cmpgt_ps(dot2,MMReplicate(m_PhiDot)); // outside light cone? strength=_mm_and_ps(OutsideMask,strength); } break; case MATERIAL_LIGHT_DIRECTIONAL: break; default: break; } strength=_mm_mul_ps(strength,falloff); color.x=_mm_add_ps(color.x,_mm_mul_ps(strength,MMReplicate(m_Color.x))); color.y=_mm_add_ps(color.y,_mm_mul_ps(strength,MMReplicate(m_Color.y))); color.z=_mm_add_ps(color.z,_mm_mul_ps(strength,MMReplicate(m_Color.z))); }
void RayTracingEnvironment::RenderScene( int width, int height, // width and height of desired rendering int stride, // actual width in pixels of target buffer uint32 *output_buffer, // pointer to destination Vector CameraOrigin, // eye position Vector ULCorner, // word space coordinates of upper left // monitor corner Vector URCorner, // top right corner Vector LLCorner, // lower left Vector LRCorner, // lower right RayTraceLightingMode_t lmode) { // first, compute deltas Vector dxvector=URCorner; dxvector-=ULCorner; dxvector*=(1.0/width); Vector dxvectortimes2=dxvector; dxvectortimes2+=dxvector; Vector dyvector=LLCorner; dyvector-=ULCorner; dyvector*=(1.0/height); // block_offsets-relative offsets for eahc of the 4 pixels in the block, in sse format FourVectors block_offsets; block_offsets.LoadAndSwizzle(Vector(0,0,0),dxvector,dyvector,dxvector+dyvector); FourRays myrays; myrays.origin.DuplicateVector(CameraOrigin); // tmprays is used fo rthe case when we cannot trace 4 rays at once. FourRays tmprays; tmprays.origin.DuplicateVector(CameraOrigin); // now, we will ray trace pixels. we will do the rays in a 2x2 pattern for(int y=0;y<height;y+=2) { Vector SLoc=dyvector; SLoc*=((float) y); SLoc+=ULCorner; uint32 *dest=output_buffer+y*stride; for(int x=0;x<width;x+=2) { myrays.direction.DuplicateVector(SLoc); myrays.direction+=block_offsets; myrays.direction.VectorNormalize(); RayTracingResult rslt; Trace4Rays(myrays,all_zeros,TraceLimit, &rslt); if ((rslt.HitIds[0]==-1) && (rslt.HitIds[1]==-1) && (rslt.HitIds[2]==-1) && (rslt.HitIds[3]==-1)) MapLinearIntensities(BackgroundColor,dest,dest+1,dest+stride,dest+stride+1); else { // make sure normal points back towards ray origin fltx4 ndoti=rslt.surface_normal*myrays.direction; fltx4 bad_dirs=AndSIMD(CmpGtSIMD(ndoti,Four_Zeros), LoadAlignedSIMD((float *) signmask)); // flip signs of all "wrong" normals rslt.surface_normal.x=XorSIMD(bad_dirs,rslt.surface_normal.x); rslt.surface_normal.y=XorSIMD(bad_dirs,rslt.surface_normal.y); rslt.surface_normal.z=XorSIMD(bad_dirs,rslt.surface_normal.z); FourVectors intens; intens.DuplicateVector(Vector(0,0,0)); // set up colors FourVectors surf_colors; surf_colors.DuplicateVector(Vector(0,0,0)); for(int i=0;i<4;i++) { if (rslt.HitIds[i]>=0) { surf_colors.X(i)=TriangleColors[rslt.HitIds[i]].x; surf_colors.Y(i)=TriangleColors[rslt.HitIds[i]].y; surf_colors.Z(i)=TriangleColors[rslt.HitIds[i]].z; } } FourVectors surface_pos=myrays.direction; surface_pos*=rslt.HitDistance; surface_pos+=myrays.origin; switch(lmode) { case DIRECT_LIGHTING: { // light all points for(int l=0;l<LightList.Count();l++) { LightList[l].ComputeLightAtPoints(surface_pos,rslt.surface_normal, intens); } } break; case DIRECT_LIGHTING_WITH_SHADOWS: { // light all points for(int l=0;l<LightList.Count();l++) { FourVectors ldir; ldir.DuplicateVector(LightList[l].m_Position); ldir-=surface_pos; fltx4 MaxT=ldir.length(); ldir.VectorNormalizeFast(); // now, compute shadow flag FourRays myrays; myrays.origin=surface_pos; FourVectors epsilon=ldir; epsilon*=0.01; myrays.origin+=epsilon; myrays.direction=ldir; RayTracingResult shadowtest; Trace4Rays(myrays,Four_Zeros,MaxT, &shadowtest); fltx4 unshadowed=CmpGtSIMD(shadowtest.HitDistance,MaxT); if (! (IsAllZeros(unshadowed))) { FourVectors tmp; tmp.DuplicateVector(Vector(0,0,0)); LightList[l].ComputeLightAtPoints(surface_pos,rslt.surface_normal, tmp); intens.x=AddSIMD(intens.x,AndSIMD(tmp.x,unshadowed)); intens.y=AddSIMD(intens.y,AndSIMD(tmp.y,unshadowed)); intens.z=AddSIMD(intens.z,AndSIMD(tmp.z,unshadowed)); } } } break; } // now, mask off non-hitting pixels intens.VProduct(surf_colors); fltx4 no_hit_mask=CmpGtSIMD(rslt.HitDistance,TraceLimit); intens.x=OrSIMD(AndSIMD(BackgroundColor.x,no_hit_mask), AndNotSIMD(no_hit_mask,intens.x)); intens.y=OrSIMD(AndSIMD(BackgroundColor.y,no_hit_mask), AndNotSIMD(no_hit_mask,intens.y)); intens.z=OrSIMD(AndSIMD(BackgroundColor.y,no_hit_mask), AndNotSIMD(no_hit_mask,intens.z)); MapLinearIntensities(intens,dest,dest+1,dest+stride,dest+stride+1); } dest+=2; SLoc+=dxvectortimes2; } } }
void RayTracingEnvironment::ComputeVirtualLightSources(void) { int start_pos=0; for(int b=0;b<3;b++) { int nl=LightList.Count(); int where_to_start=start_pos; start_pos=nl; for(int l=where_to_start;l<nl;l++) { DirectionalSampler_t sample_generator; int n_desired=1*LightList[l].m_Color.Length(); if (LightList[l].m_Type==MATERIAL_LIGHT_SPOT) n_desired*=LightList[l].m_Phi/2; for(int try1=0;try1<n_desired;try1++) { LightDesc_t const &li=LightList[l]; FourRays myrays; myrays.origin.DuplicateVector(li.m_Position); RayTracingResult rslt; Vector trial_dir=sample_generator.NextValue(); if (li.IsDirectionWithinLightCone(trial_dir)) { myrays.direction.DuplicateVector(trial_dir); Trace4Rays(myrays,all_zeros,ReplicateX4(1000.0), &rslt); if ((rslt.HitIds[0]!=-1)) { // make sure normal points back towards ray origin fltx4 ndoti=rslt.surface_normal*myrays.direction; fltx4 bad_dirs=AndSIMD(CmpGtSIMD(ndoti,Four_Zeros), LoadAlignedSIMD((float *) signmask)); // flip signs of all "wrong" normals rslt.surface_normal.x=XorSIMD(bad_dirs,rslt.surface_normal.x); rslt.surface_normal.y=XorSIMD(bad_dirs,rslt.surface_normal.y); rslt.surface_normal.z=XorSIMD(bad_dirs,rslt.surface_normal.z); // a hit! let's make a virtual light source // treat the virtual light as a disk with its center at the hit position // and its radius scaled by the amount of the solid angle this probe // represents. float area_of_virtual_light= 4.0*M_PI*SQ( SubFloat( rslt.HitDistance, 0 ) )*(1.0/n_desired); FourVectors intens; intens.DuplicateVector(Vector(0,0,0)); FourVectors surface_pos=myrays.direction; surface_pos*=rslt.HitDistance; surface_pos+=myrays.origin; FourVectors delta=rslt.surface_normal; delta*=0.1; surface_pos+=delta; LightList[l].ComputeLightAtPoints(surface_pos,rslt.surface_normal, intens); FourVectors surf_colors; surf_colors.DuplicateVector(TriangleColors[rslt.HitIds[0]]); intens*=surf_colors; // see if significant LightDesc_t l1; l1.m_Type=MATERIAL_LIGHT_SPOT; l1.m_Position=Vector(surface_pos.X(0),surface_pos.Y(0),surface_pos.Z(0)); l1.m_Direction=Vector(rslt.surface_normal.X(0),rslt.surface_normal.Y(0), rslt.surface_normal.Z(0)); l1.m_Color=Vector(intens.X(0),intens.Y(0),intens.Z(0)); if (l1.m_Color.Length()>0) { l1.m_Color*=area_of_virtual_light/M_PI; l1.m_Range=0.0; l1.m_Falloff=1.0; l1.m_Attenuation0=1.0; l1.m_Attenuation1=0.0; l1.m_Attenuation2=1.0; // intens falls off as 1/r^2 l1.m_Theta=0; l1.m_Phi=M_PI; l1.RecalculateDerivedValues(); LightList.AddToTail(l1); } } } } } } }
void LightDesc_t::ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal, FourVectors &color, bool DoHalfLambert ) const { FourVectors delta; Assert((m_Type==MATERIAL_LIGHT_POINT) || (m_Type==MATERIAL_LIGHT_SPOT) || (m_Type==MATERIAL_LIGHT_DIRECTIONAL)); switch (m_Type) { case MATERIAL_LIGHT_POINT: case MATERIAL_LIGHT_SPOT: delta.DuplicateVector(m_Position); delta-=pos; break; case MATERIAL_LIGHT_DIRECTIONAL: ComputeLightAtPointsForDirectional( pos, normal, color, DoHalfLambert ); return; default: return; } fltx4 dist2 = delta*delta; dist2=MaxSIMD( Four_Ones, dist2 ); fltx4 falloff; if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 ) { falloff = ReplicateX4(m_Attenuation0); } else falloff= Four_Epsilons; if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 ) { falloff=AddSIMD(falloff,MulSIMD(ReplicateX4(m_Attenuation1),SqrtEstSIMD(dist2))); } if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 ) { falloff=AddSIMD(falloff,MulSIMD(ReplicateX4(m_Attenuation2),dist2)); } falloff=ReciprocalEstSIMD(falloff); // Cull out light beyond this radius // now, zero out elements for which dist2 was > range^2. !!speed!! lights should store dist^2 in sse format if (m_Range != 0.f) { fltx4 RangeSquared=ReplicateX4(m_RangeSquared); // !!speed!! falloff=AndSIMD(falloff,CmpLtSIMD(dist2,RangeSquared)); } delta.VectorNormalizeFast(); fltx4 strength=delta*normal; if (DoHalfLambert) { strength=AddSIMD(MulSIMD(strength,Four_PointFives),Four_PointFives); } else strength=MaxSIMD(Four_Zeros,delta*normal); switch(m_Type) { case MATERIAL_LIGHT_POINT: // half-lambert break; case MATERIAL_LIGHT_SPOT: { fltx4 dot2=SubSIMD(Four_Zeros,delta*m_Direction); // dot position with spot light dir for cone falloff fltx4 cone_falloff_scale=MulSIMD(ReplicateX4(m_OneOverThetaDotMinusPhiDot), SubSIMD(dot2,ReplicateX4(m_PhiDot))); cone_falloff_scale=MinSIMD(cone_falloff_scale,Four_Ones); if ((m_Falloff!=0.0) && (m_Falloff!=1.0)) { // !!speed!! could compute integer exponent needed by powsimd and store in light cone_falloff_scale=PowSIMD(cone_falloff_scale,m_Falloff); } strength=MulSIMD(cone_falloff_scale,strength); // now, zero out lighting where dot2<phidot. This will mask out any invalid results // from pow function, etc fltx4 OutsideMask=CmpGtSIMD(dot2,ReplicateX4(m_PhiDot)); // outside light cone? strength=AndSIMD(OutsideMask,strength); } break; default: break; } strength=MulSIMD(strength,falloff); color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(m_Color.x))); color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(m_Color.y))); color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(m_Color.z))); }