inline fltx4 IsPointInBoundsX4( const Vector point, fltx4 boundsMin[3], fltx4 boundsMax[3] ) { fltx4 pointX = ReplicateX4( point.x ); fltx4 pointY = ReplicateX4( point.y ); fltx4 pointZ = ReplicateX4( point.z ); return AndSIMD ( AndSIMD ( AndSIMD ( CmpGeSIMD( pointX, boundsMin[0] ), CmpLeSIMD( pointX, boundsMax[0] ) ), AndSIMD ( CmpGeSIMD( pointY, boundsMin[1] ), CmpLeSIMD( pointY, boundsMax[1] ) ) ), AndSIMD ( CmpGeSIMD( pointZ, boundsMin[2] ), CmpLeSIMD( pointZ, boundsMax[2] ) ) ); }
void MapLinearIntensities(FourVectors const &intens,uint32 *p1, uint32 *p2, uint32 *p3, uint32 *p4) { // convert four pixels worth of sse-style rgb into argb lwords // NOTE the _mm_empty macro is voodoo. do not mess with this routine casually - simply throwing // anything that ends up generating a fpu stack references in here would be bad news. static fltx4 pixscale={255.0,255.0,255.0,255.0}; fltx4 r,g,b; r=MinSIMD(pixscale,MulSIMD(pixscale,PowSIMD(intens.x,IGAMMA))); g=MinSIMD(pixscale,MulSIMD(pixscale,PowSIMD(intens.y,IGAMMA))); b=MinSIMD(pixscale,MulSIMD(pixscale,PowSIMD(intens.z,IGAMMA))); // now, convert to integer r=AndSIMD( AddSIMD( r, Four_MagicNumbers ), PIXMASK ); g=AndSIMD( AddSIMD( g, Four_MagicNumbers ), PIXMASK ); b=AndSIMD( AddSIMD( b, Four_MagicNumbers ), PIXMASK ); *(p1)=(SubInt(r, 0))|(SubInt(g, 0)<<8)|(SubInt(b, 0)<<16); *(p2)=(SubInt(r, 1))|(SubInt(g, 1)<<8)|(SubInt(b, 1)<<16); *(p3)=(SubInt(r, 2))|(SubInt(g, 2)<<8)|(SubInt(b, 2)<<16); *(p4)=(SubInt(r, 3))|(SubInt(g, 3)<<8)|(SubInt(b, 3)<<16); }
inline fltx4 RandSIMD( void ) { // ret= rand[k]+rand[j] fltx4 retval=AddSIMD( *m_pRand_K, *m_pRand_J ); // if ( ret>=1.0) ret-=1.0 fltx4 overflow_mask=CmpGeSIMD( retval, Four_Ones ); retval=SubSIMD( retval, AndSIMD( Four_Ones, overflow_mask ) ); *m_pRand_K = retval; // update pointers w/ wrap-around if ( --m_pRand_J < m_RandY ) m_pRand_J=m_RandY+54; if ( --m_pRand_K < m_RandY ) m_pRand_K=m_RandY+54; return retval; }
void RayTracingEnvironment::RenderScene( int width, int height, // width and height of desired rendering int stride, // actual width in pixels of target buffer uint32 *output_buffer, // pointer to destination Vector CameraOrigin, // eye position Vector ULCorner, // word space coordinates of upper left // monitor corner Vector URCorner, // top right corner Vector LLCorner, // lower left Vector LRCorner, // lower right RayTraceLightingMode_t lmode) { // first, compute deltas Vector dxvector=URCorner; dxvector-=ULCorner; dxvector*=(1.0/width); Vector dxvectortimes2=dxvector; dxvectortimes2+=dxvector; Vector dyvector=LLCorner; dyvector-=ULCorner; dyvector*=(1.0/height); // block_offsets-relative offsets for eahc of the 4 pixels in the block, in sse format FourVectors block_offsets; block_offsets.LoadAndSwizzle(Vector(0,0,0),dxvector,dyvector,dxvector+dyvector); FourRays myrays; myrays.origin.DuplicateVector(CameraOrigin); // tmprays is used fo rthe case when we cannot trace 4 rays at once. FourRays tmprays; tmprays.origin.DuplicateVector(CameraOrigin); // now, we will ray trace pixels. we will do the rays in a 2x2 pattern for(int y=0;y<height;y+=2) { Vector SLoc=dyvector; SLoc*=((float) y); SLoc+=ULCorner; uint32 *dest=output_buffer+y*stride; for(int x=0;x<width;x+=2) { myrays.direction.DuplicateVector(SLoc); myrays.direction+=block_offsets; myrays.direction.VectorNormalize(); RayTracingResult rslt; Trace4Rays(myrays,all_zeros,TraceLimit, &rslt); if ((rslt.HitIds[0]==-1) && (rslt.HitIds[1]==-1) && (rslt.HitIds[2]==-1) && (rslt.HitIds[3]==-1)) MapLinearIntensities(BackgroundColor,dest,dest+1,dest+stride,dest+stride+1); else { // make sure normal points back towards ray origin fltx4 ndoti=rslt.surface_normal*myrays.direction; fltx4 bad_dirs=AndSIMD(CmpGtSIMD(ndoti,Four_Zeros), LoadAlignedSIMD((float *) signmask)); // flip signs of all "wrong" normals rslt.surface_normal.x=XorSIMD(bad_dirs,rslt.surface_normal.x); rslt.surface_normal.y=XorSIMD(bad_dirs,rslt.surface_normal.y); rslt.surface_normal.z=XorSIMD(bad_dirs,rslt.surface_normal.z); FourVectors intens; intens.DuplicateVector(Vector(0,0,0)); // set up colors FourVectors surf_colors; surf_colors.DuplicateVector(Vector(0,0,0)); for(int i=0;i<4;i++) { if (rslt.HitIds[i]>=0) { surf_colors.X(i)=TriangleColors[rslt.HitIds[i]].x; surf_colors.Y(i)=TriangleColors[rslt.HitIds[i]].y; surf_colors.Z(i)=TriangleColors[rslt.HitIds[i]].z; } } FourVectors surface_pos=myrays.direction; surface_pos*=rslt.HitDistance; surface_pos+=myrays.origin; switch(lmode) { case DIRECT_LIGHTING: { // light all points for(int l=0;l<LightList.Count();l++) { LightList[l].ComputeLightAtPoints(surface_pos,rslt.surface_normal, intens); } } break; case DIRECT_LIGHTING_WITH_SHADOWS: { // light all points for(int l=0;l<LightList.Count();l++) { FourVectors ldir; ldir.DuplicateVector(LightList[l].m_Position); ldir-=surface_pos; fltx4 MaxT=ldir.length(); ldir.VectorNormalizeFast(); // now, compute shadow flag FourRays myrays; myrays.origin=surface_pos; FourVectors epsilon=ldir; epsilon*=0.01; myrays.origin+=epsilon; myrays.direction=ldir; RayTracingResult shadowtest; Trace4Rays(myrays,Four_Zeros,MaxT, &shadowtest); fltx4 unshadowed=CmpGtSIMD(shadowtest.HitDistance,MaxT); if (! (IsAllZeros(unshadowed))) { FourVectors tmp; tmp.DuplicateVector(Vector(0,0,0)); LightList[l].ComputeLightAtPoints(surface_pos,rslt.surface_normal, tmp); intens.x=AddSIMD(intens.x,AndSIMD(tmp.x,unshadowed)); intens.y=AddSIMD(intens.y,AndSIMD(tmp.y,unshadowed)); intens.z=AddSIMD(intens.z,AndSIMD(tmp.z,unshadowed)); } } } break; } // now, mask off non-hitting pixels intens.VProduct(surf_colors); fltx4 no_hit_mask=CmpGtSIMD(rslt.HitDistance,TraceLimit); intens.x=OrSIMD(AndSIMD(BackgroundColor.x,no_hit_mask), AndNotSIMD(no_hit_mask,intens.x)); intens.y=OrSIMD(AndSIMD(BackgroundColor.y,no_hit_mask), AndNotSIMD(no_hit_mask,intens.y)); intens.z=OrSIMD(AndSIMD(BackgroundColor.y,no_hit_mask), AndNotSIMD(no_hit_mask,intens.z)); MapLinearIntensities(intens,dest,dest+1,dest+stride,dest+stride+1); } dest+=2; SLoc+=dxvectortimes2; } } }
void RayTracingEnvironment::ComputeVirtualLightSources(void) { int start_pos=0; for(int b=0;b<3;b++) { int nl=LightList.Count(); int where_to_start=start_pos; start_pos=nl; for(int l=where_to_start;l<nl;l++) { DirectionalSampler_t sample_generator; int n_desired=1*LightList[l].m_Color.Length(); if (LightList[l].m_Type==MATERIAL_LIGHT_SPOT) n_desired*=LightList[l].m_Phi/2; for(int try1=0;try1<n_desired;try1++) { LightDesc_t const &li=LightList[l]; FourRays myrays; myrays.origin.DuplicateVector(li.m_Position); RayTracingResult rslt; Vector trial_dir=sample_generator.NextValue(); if (li.IsDirectionWithinLightCone(trial_dir)) { myrays.direction.DuplicateVector(trial_dir); Trace4Rays(myrays,all_zeros,ReplicateX4(1000.0), &rslt); if ((rslt.HitIds[0]!=-1)) { // make sure normal points back towards ray origin fltx4 ndoti=rslt.surface_normal*myrays.direction; fltx4 bad_dirs=AndSIMD(CmpGtSIMD(ndoti,Four_Zeros), LoadAlignedSIMD((float *) signmask)); // flip signs of all "wrong" normals rslt.surface_normal.x=XorSIMD(bad_dirs,rslt.surface_normal.x); rslt.surface_normal.y=XorSIMD(bad_dirs,rslt.surface_normal.y); rslt.surface_normal.z=XorSIMD(bad_dirs,rslt.surface_normal.z); // a hit! let's make a virtual light source // treat the virtual light as a disk with its center at the hit position // and its radius scaled by the amount of the solid angle this probe // represents. float area_of_virtual_light= 4.0*M_PI*SQ( SubFloat( rslt.HitDistance, 0 ) )*(1.0/n_desired); FourVectors intens; intens.DuplicateVector(Vector(0,0,0)); FourVectors surface_pos=myrays.direction; surface_pos*=rslt.HitDistance; surface_pos+=myrays.origin; FourVectors delta=rslt.surface_normal; delta*=0.1; surface_pos+=delta; LightList[l].ComputeLightAtPoints(surface_pos,rslt.surface_normal, intens); FourVectors surf_colors; surf_colors.DuplicateVector(TriangleColors[rslt.HitIds[0]]); intens*=surf_colors; // see if significant LightDesc_t l1; l1.m_Type=MATERIAL_LIGHT_SPOT; l1.m_Position=Vector(surface_pos.X(0),surface_pos.Y(0),surface_pos.Z(0)); l1.m_Direction=Vector(rslt.surface_normal.X(0),rslt.surface_normal.Y(0), rslt.surface_normal.Z(0)); l1.m_Color=Vector(intens.X(0),intens.Y(0),intens.Z(0)); if (l1.m_Color.Length()>0) { l1.m_Color*=area_of_virtual_light/M_PI; l1.m_Range=0.0; l1.m_Falloff=1.0; l1.m_Attenuation0=1.0; l1.m_Attenuation1=0.0; l1.m_Attenuation2=1.0; // intens falls off as 1/r^2 l1.m_Theta=0; l1.m_Phi=M_PI; l1.RecalculateDerivedValues(); LightList.AddToTail(l1); } } } } } } }
fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z ) { // use magic to convert to integer index fltx4 x_idx = AndSIMD( MASK255, AddSIMD( x, Four_MagicNumbers ) ); fltx4 y_idx = AndSIMD( MASK255, AddSIMD( y, Four_MagicNumbers ) ); fltx4 z_idx = AndSIMD( MASK255, AddSIMD( z, Four_MagicNumbers ) ); fltx4 lattice000 = Four_Zeros, lattice001 = Four_Zeros, lattice010 = Four_Zeros, lattice011 = Four_Zeros; fltx4 lattice100 = Four_Zeros, lattice101 = Four_Zeros, lattice110 = Four_Zeros, lattice111 = Four_Zeros; // FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes) // Converting the indexed noise values back to vectors will cause more (128 bytes) // The noise table could store vectors if we chunked it into 2x2x2 blocks. fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros; #define DOPASS(i) \ { unsigned int xi = SubInt( x_idx, i ); \ unsigned int yi = SubInt( y_idx, i ); \ unsigned int zi = SubInt( z_idx, i ); \ SubFloat( xfrac, i ) = (xi & 0xff)*(1.0/256.0); \ SubFloat( yfrac, i ) = (yi & 0xff)*(1.0/256.0); \ SubFloat( zfrac, i ) = (zi & 0xff)*(1.0/256.0); \ xi>>=8; \ yi>>=8; \ zi>>=8; \ \ SubFloat( lattice000, i ) = GetLatticePointValue( xi,yi,zi ); \ SubFloat( lattice001, i ) = GetLatticePointValue( xi,yi,zi+1 ); \ SubFloat( lattice010, i ) = GetLatticePointValue( xi,yi+1,zi ); \ SubFloat( lattice011, i ) = GetLatticePointValue( xi,yi+1,zi+1 ); \ SubFloat( lattice100, i ) = GetLatticePointValue( xi+1,yi,zi ); \ SubFloat( lattice101, i ) = GetLatticePointValue( xi+1,yi,zi+1 ); \ SubFloat( lattice110, i ) = GetLatticePointValue( xi+1,yi+1,zi ); \ SubFloat( lattice111, i ) = GetLatticePointValue( xi+1,yi+1,zi+1 ); \ } DOPASS( 0 ); DOPASS( 1 ); DOPASS( 2 ); DOPASS( 3 ); // now, we have 8 lattice values for each of four points as m128s, and interpolant values for // each axis in m128 form in [xyz]frac. Perfom the trilinear interpolation as SIMD ops // first, do x interpolation fltx4 l2d00 = AddSIMD( lattice000, MulSIMD( xfrac, SubSIMD( lattice100, lattice000 ) ) ); fltx4 l2d01 = AddSIMD( lattice001, MulSIMD( xfrac, SubSIMD( lattice101, lattice001 ) ) ); fltx4 l2d10 = AddSIMD( lattice010, MulSIMD( xfrac, SubSIMD( lattice110, lattice010 ) ) ); fltx4 l2d11 = AddSIMD( lattice011, MulSIMD( xfrac, SubSIMD( lattice111, lattice011 ) ) ); // now, do y interpolation fltx4 l1d0 = AddSIMD( l2d00, MulSIMD( yfrac, SubSIMD( l2d10, l2d00 ) ) ); fltx4 l1d1 = AddSIMD( l2d01, MulSIMD( yfrac, SubSIMD( l2d11, l2d01 ) ) ); // final z interpolation fltx4 rslt = AddSIMD( l1d0, MulSIMD( zfrac, SubSIMD( l1d1, l1d0 ) ) ); // map to 0..1 return MulSIMD( Four_Twos, SubSIMD( rslt, Four_PointFives ) ); }
void CLightingManager::SortLights() { #if DEBUG for ( int i = 0; i < LSORT_COUNT; i++ ) Assert( m_hPreSortedLights[ i ].Count() == 0 ); #endif m_bDrawVolumetrics = false; float zNear = m_flzNear + 2; Vector vecBloat( zNear, zNear, zNear ); Vector camMins( m_vecViewOrigin - vecBloat ); Vector camMaxs( m_vecViewOrigin + vecBloat ); #if DEFCFG_USE_SSE fltx4 zNearX4 = ReplicateX4( zNear ); for( int i = 0; i < m_uiSortDataCount; i++ ) { def_light_presortdatax4_t& s = m_pSortDataX4[i]; fltx4 adjustedMins[3] = { SubSIMD( s.bounds_min_naive[0], zNearX4 ), SubSIMD( s.bounds_min_naive[1], zNearX4 ), SubSIMD( s.bounds_min_naive[2], zNearX4 ) }; fltx4 adjustedMaxs[3] = { AddSIMD( s.bounds_max_naive[0], zNearX4 ), AddSIMD( s.bounds_max_naive[1], zNearX4 ), AddSIMD( s.bounds_max_naive[2], zNearX4 ) }; fltx4 needsFullscreen = IsPointInBoundsX4( m_vecViewOrigin, adjustedMins, adjustedMaxs ); //Jack: this is terrible I know for( int i = 0; i < s.count; i++ ) { if( s.lights[i]->IsSpot() ) { if( _isnan( SubFloat( needsFullscreen, i ) ) ) { if( !s.lights[i]->spotFrustum.CullBox( camMins, camMaxs ) ) { m_hPreSortedLights[LSORT_SPOT_FULLSCREEN].AddToTail( s.lights[i] ); } else { m_hPreSortedLights[LSORT_SPOT_WORLD].AddToTail( s.lights[i] ); } } else { m_hPreSortedLights[LSORT_SPOT_WORLD].AddToTail( s.lights[i] ); } } else { if( _isnan( SubFloat( needsFullscreen, i ) ) ) { m_hPreSortedLights[LSORT_POINT_FULLSCREEN].AddToTail( s.lights[i] ); } else { m_hPreSortedLights[LSORT_POINT_WORLD].AddToTail( s.lights[i] ); } } } fltx4 volume = AndSIMD( s.hasVolumetrics, s.hasVolumetrics ); m_bDrawVolumetrics = m_bDrawVolumetrics || !IsAllZeros( volume ); } #else FOR_EACH_VEC_FAST( def_light_t*, m_hRenderLights, l ) { bool bNeedsFullscreen = IsPointInBounds( m_vecViewOrigin, l->bounds_min_naive - vecBloat, l->bounds_max_naive + vecBloat ); if ( bNeedsFullscreen && l->IsSpot() ) { bNeedsFullscreen = !l->spotFrustum.CullBox( camMins, camMaxs ); } const bool bVolume = ( l->HasShadow() && l->HasVolumetrics() ); m_bDrawVolumetrics = m_bDrawVolumetrics || bVolume; Assert( l->iLighttype * 2 + 1 < LSORT_COUNT ); m_hPreSortedLights[ l->iLighttype * 2 + (int)bNeedsFullscreen ].AddToTail( l ); }
void LightDesc_t::ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal, FourVectors &color, bool DoHalfLambert ) const { FourVectors delta; Assert((m_Type==MATERIAL_LIGHT_POINT) || (m_Type==MATERIAL_LIGHT_SPOT) || (m_Type==MATERIAL_LIGHT_DIRECTIONAL)); switch (m_Type) { case MATERIAL_LIGHT_POINT: case MATERIAL_LIGHT_SPOT: delta.DuplicateVector(m_Position); delta-=pos; break; case MATERIAL_LIGHT_DIRECTIONAL: ComputeLightAtPointsForDirectional( pos, normal, color, DoHalfLambert ); return; default: return; } fltx4 dist2 = delta*delta; dist2=MaxSIMD( Four_Ones, dist2 ); fltx4 falloff; if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 ) { falloff = ReplicateX4(m_Attenuation0); } else falloff= Four_Epsilons; if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 ) { falloff=AddSIMD(falloff,MulSIMD(ReplicateX4(m_Attenuation1),SqrtEstSIMD(dist2))); } if( m_Flags & LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 ) { falloff=AddSIMD(falloff,MulSIMD(ReplicateX4(m_Attenuation2),dist2)); } falloff=ReciprocalEstSIMD(falloff); // Cull out light beyond this radius // now, zero out elements for which dist2 was > range^2. !!speed!! lights should store dist^2 in sse format if (m_Range != 0.f) { fltx4 RangeSquared=ReplicateX4(m_RangeSquared); // !!speed!! falloff=AndSIMD(falloff,CmpLtSIMD(dist2,RangeSquared)); } delta.VectorNormalizeFast(); fltx4 strength=delta*normal; if (DoHalfLambert) { strength=AddSIMD(MulSIMD(strength,Four_PointFives),Four_PointFives); } else strength=MaxSIMD(Four_Zeros,delta*normal); switch(m_Type) { case MATERIAL_LIGHT_POINT: // half-lambert break; case MATERIAL_LIGHT_SPOT: { fltx4 dot2=SubSIMD(Four_Zeros,delta*m_Direction); // dot position with spot light dir for cone falloff fltx4 cone_falloff_scale=MulSIMD(ReplicateX4(m_OneOverThetaDotMinusPhiDot), SubSIMD(dot2,ReplicateX4(m_PhiDot))); cone_falloff_scale=MinSIMD(cone_falloff_scale,Four_Ones); if ((m_Falloff!=0.0) && (m_Falloff!=1.0)) { // !!speed!! could compute integer exponent needed by powsimd and store in light cone_falloff_scale=PowSIMD(cone_falloff_scale,m_Falloff); } strength=MulSIMD(cone_falloff_scale,strength); // now, zero out lighting where dot2<phidot. This will mask out any invalid results // from pow function, etc fltx4 OutsideMask=CmpGtSIMD(dot2,ReplicateX4(m_PhiDot)); // outside light cone? strength=AndSIMD(OutsideMask,strength); } break; default: break; } strength=MulSIMD(strength,falloff); color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(m_Color.x))); color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(m_Color.y))); color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(m_Color.z))); }
void RayTracingEnvironment::Trace4Rays(const FourRays &rays, fltx4 TMin, fltx4 TMax, int DirectionSignMask, RayTracingResult *rslt_out, int32 skip_id, ITransparentTriangleCallback *pCallback) { rays.Check(); memset(rslt_out->HitIds,0xff,sizeof(rslt_out->HitIds)); rslt_out->HitDistance=ReplicateX4(1.0e23); rslt_out->surface_normal.DuplicateVector(Vector(0.,0.,0.)); FourVectors OneOverRayDir=rays.direction; OneOverRayDir.MakeReciprocalSaturate(); // now, clip rays against bounding box for(int c=0;c<3;c++) { fltx4 isect_min_t= MulSIMD(SubSIMD(ReplicateX4(m_MinBound[c]),rays.origin[c]),OneOverRayDir[c]); fltx4 isect_max_t= MulSIMD(SubSIMD(ReplicateX4(m_MaxBound[c]),rays.origin[c]),OneOverRayDir[c]); TMin=MaxSIMD(TMin,MinSIMD(isect_min_t,isect_max_t)); TMax=MinSIMD(TMax,MaxSIMD(isect_min_t,isect_max_t)); } fltx4 active=CmpLeSIMD(TMin,TMax); // mask of which rays are active if (! IsAnyNegative(active) ) return; // missed bounding box int32 mailboxids[MAILBOX_HASH_SIZE]; // used to avoid redundant triangle tests memset(mailboxids,0xff,sizeof(mailboxids)); // !!speed!! keep around? int front_idx[3],back_idx[3]; // based on ray direction, whether to // visit left or right node first if (DirectionSignMask & 1) { back_idx[0]=0; front_idx[0]=1; } else { back_idx[0]=1; front_idx[0]=0; } if (DirectionSignMask & 2) { back_idx[1]=0; front_idx[1]=1; } else { back_idx[1]=1; front_idx[1]=0; } if (DirectionSignMask & 4) { back_idx[2]=0; front_idx[2]=1; } else { back_idx[2]=1; front_idx[2]=0; } NodeToVisit NodeQueue[MAX_NODE_STACK_LEN]; CacheOptimizedKDNode const *CurNode=&(OptimizedKDTree[0]); NodeToVisit *stack_ptr=&NodeQueue[MAX_NODE_STACK_LEN]; while(1) { while (CurNode->NodeType() != KDNODE_STATE_LEAF) // traverse until next leaf { int split_plane_number=CurNode->NodeType(); CacheOptimizedKDNode const *FrontChild=&(OptimizedKDTree[CurNode->LeftChild()]); fltx4 dist_to_sep_plane= // dist=(split-org)/dir MulSIMD( SubSIMD(ReplicateX4(CurNode->SplittingPlaneValue), rays.origin[split_plane_number]),OneOverRayDir[split_plane_number]); fltx4 active=CmpLeSIMD(TMin,TMax); // mask of which rays are active // now, decide how to traverse children. can either do front,back, or do front and push // back. fltx4 hits_front=AndSIMD(active,CmpGeSIMD(dist_to_sep_plane,TMin)); if (! IsAnyNegative(hits_front)) { // missed the front. only traverse back //printf("only visit back %d\n",CurNode->LeftChild()+back_idx[split_plane_number]); CurNode=FrontChild+back_idx[split_plane_number]; TMin=MaxSIMD(TMin, dist_to_sep_plane); } else { fltx4 hits_back=AndSIMD(active,CmpLeSIMD(dist_to_sep_plane,TMax)); if (! IsAnyNegative(hits_back) ) { // missed the back - only need to traverse front node //printf("only visit front %d\n",CurNode->LeftChild()+front_idx[split_plane_number]); CurNode=FrontChild+front_idx[split_plane_number]; TMax=MinSIMD(TMax, dist_to_sep_plane); } else { // at least some rays hit both nodes. // must push far, traverse near //printf("visit %d,%d\n",CurNode->LeftChild()+front_idx[split_plane_number], // CurNode->LeftChild()+back_idx[split_plane_number]); assert(stack_ptr>NodeQueue); --stack_ptr; stack_ptr->node=FrontChild+back_idx[split_plane_number]; stack_ptr->TMin=MaxSIMD(TMin,dist_to_sep_plane); stack_ptr->TMax=TMax; CurNode=FrontChild+front_idx[split_plane_number]; TMax=MinSIMD(TMax,dist_to_sep_plane); } } } // hit a leaf! must do intersection check int ntris=CurNode->NumberOfTrianglesInLeaf(); if (ntris) { int32 const *tlist=&(TriangleIndexList[CurNode->TriangleIndexStart()]); do { int tnum=*(tlist++); //printf("try tri %d\n",tnum); // check mailbox int mbox_slot=tnum & (MAILBOX_HASH_SIZE-1); TriIntersectData_t const *tri = &( OptimizedTriangleList[tnum].m_Data.m_IntersectData ); if ( ( mailboxids[mbox_slot] != tnum ) && ( tri->m_nTriangleID != skip_id ) ) { n_intersection_calculations++; mailboxids[mbox_slot] = tnum; // compute plane intersection FourVectors N; N.x = ReplicateX4( tri->m_flNx ); N.y = ReplicateX4( tri->m_flNy ); N.z = ReplicateX4( tri->m_flNz ); fltx4 DDotN = rays.direction * N; // mask off zero or near zero (ray parallel to surface) fltx4 did_hit = OrSIMD( CmpGtSIMD( DDotN,FourEpsilons ), CmpLtSIMD( DDotN, FourNegativeEpsilons ) ); fltx4 numerator=SubSIMD( ReplicateX4( tri->m_flD ), rays.origin * N ); fltx4 isect_t=DivSIMD( numerator,DDotN ); // now, we have the distance to the plane. lets update our mask did_hit = AndSIMD( did_hit, CmpGtSIMD( isect_t, FourZeros ) ); //did_hit=AndSIMD(did_hit,CmpLtSIMD(isect_t,TMax)); did_hit = AndSIMD( did_hit, CmpLtSIMD( isect_t, rslt_out->HitDistance ) ); if ( ! IsAnyNegative( did_hit ) ) continue; // now, check 3 edges fltx4 hitc1 = AddSIMD( rays.origin[tri->m_nCoordSelect0], MulSIMD( isect_t, rays.direction[ tri->m_nCoordSelect0] ) ); fltx4 hitc2 = AddSIMD( rays.origin[tri->m_nCoordSelect1], MulSIMD( isect_t, rays.direction[tri->m_nCoordSelect1] ) ); // do barycentric coordinate check fltx4 B0 = MulSIMD( ReplicateX4( tri->m_ProjectedEdgeEquations[0] ), hitc1 ); B0 = AddSIMD( B0, MulSIMD( ReplicateX4( tri->m_ProjectedEdgeEquations[1] ), hitc2 ) ); B0 = AddSIMD( B0, ReplicateX4( tri->m_ProjectedEdgeEquations[2] ) ); did_hit = AndSIMD( did_hit, CmpGeSIMD( B0, FourZeros ) ); fltx4 B1 = MulSIMD( ReplicateX4( tri->m_ProjectedEdgeEquations[3] ), hitc1 ); B1 = AddSIMD( B1, MulSIMD( ReplicateX4( tri->m_ProjectedEdgeEquations[4]), hitc2 ) ); B1 = AddSIMD( B1, ReplicateX4( tri->m_ProjectedEdgeEquations[5] ) ); did_hit = AndSIMD( did_hit, CmpGeSIMD( B1, FourZeros ) ); fltx4 B2 = AddSIMD( B1, B0 ); did_hit = AndSIMD( did_hit, CmpLeSIMD( B2, Four_Ones ) ); if ( ! IsAnyNegative( did_hit ) ) continue; // if the triangle is transparent if ( tri->m_nFlags & FCACHETRI_TRANSPARENT ) { if ( pCallback ) { // assuming a triangle indexed as v0, v1, v2 // the projected edge equations are set up such that the vert opposite the first // equation is v2, and the vert opposite the second equation is v0 // Therefore we pass them back in 1, 2, 0 order // Also B2 is currently B1 + B0 and needs to be 1 - (B1+B0) in order to be a real // barycentric coordinate. Compute that now and pass it to the callback fltx4 b2 = SubSIMD( Four_Ones, B2 ); if ( pCallback->VisitTriangle_ShouldContinue( *tri, rays, &did_hit, &B1, &b2, &B0, tnum ) ) { did_hit = Four_Zeros; } } } // now, set the hit_id and closest_hit fields for any enabled rays fltx4 replicated_n = ReplicateIX4(tnum); StoreAlignedSIMD((float *) rslt_out->HitIds, OrSIMD(AndSIMD(replicated_n,did_hit), AndNotSIMD(did_hit,LoadAlignedSIMD( (float *) rslt_out->HitIds)))); rslt_out->HitDistance=OrSIMD(AndSIMD(isect_t,did_hit), AndNotSIMD(did_hit,rslt_out->HitDistance)); rslt_out->surface_normal.x=OrSIMD( AndSIMD(N.x,did_hit), AndNotSIMD(did_hit,rslt_out->surface_normal.x)); rslt_out->surface_normal.y=OrSIMD( AndSIMD(N.y,did_hit), AndNotSIMD(did_hit,rslt_out->surface_normal.y)); rslt_out->surface_normal.z=OrSIMD( AndSIMD(N.z,did_hit), AndNotSIMD(did_hit,rslt_out->surface_normal.z)); } } while (--ntris); // now, check if all rays have terminated fltx4 raydone=CmpLeSIMD(TMax,rslt_out->HitDistance); if (! IsAnyNegative(raydone)) { return; } } if (stack_ptr==&NodeQueue[MAX_NODE_STACK_LEN]) { return; } // pop stack! CurNode=stack_ptr->node; TMin=stack_ptr->TMin; TMax=stack_ptr->TMax; stack_ptr++; } }