void neuralNet::activationPrime_sse(const float* neuronOutput, float* result) { static const __m128 ones = _mm_set1_ps(1.0f); static const __m128 sigCoefficients = _mm_set1_ps(SIGMOIDCOEFFICIENT); __m128 temp; const __m128* vOutput = (__m128*)neuronOutput; // 1 - ans temp = _mm_sub_ps(ones, *vOutput); // (1-ans) * ans temp = _mm_mul_ps(temp, *vOutput); // ans * coefficient temp = _mm_mul_ps(temp, sigCoefficients); #ifndef NDEBUG const float* _temp = (float*)&temp; assert(fastabs(_temp[0] - activationPrime(neuronOutput[0])) < 0.05f); assert(fastabs(_temp[1] - activationPrime(neuronOutput[1])) < 0.05f); assert(fastabs(_temp[2] - activationPrime(neuronOutput[2])) < 0.05f); assert(fastabs(_temp[3] - activationPrime(neuronOutput[3])) < 0.05f); #endif // return ans _mm_store_ps(result, temp); };
void experienceNet::normalPDF_sse(float* result, const float* _partitions, float _mean, float _stdDev) { /* CODE ADAPTED FROM boost/math/normal.hpp RealType exponent = x - mean; exponent *= -exponent; exponent /= 2 * sd * sd; result = exp(exponent); result /= sd * sqrt(2 * constants::pi<RealType>()); return result; */ const __m128& partitions = *(__m128*)_partitions; __m128 exponent, tmp, mean, sd; /* CODE ADAPTED FROM http://fastcpp.blogspot.com/2011/03/changing-sign-of-float-values-using-sse.html */ static const __m128 signmask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); static const __m128 twos = _mm_set_ps1(2.0f); static const __m128 sqrt_pi_2_s = _mm_set_ps1(sqrt(2.0 * M_PI)); // store mean and sd: mean = _mm_load_ps1(&_mean); sd = _mm_load_ps1(&_stdDev); // exponent = x - mean exponent = _mm_sub_ps(partitions, mean); // exponent *= -exponent; tmp = _mm_xor_ps(exponent, signmask); exponent = _mm_mul_ps(exponent, tmp); // exponent /= 2 * sd * sd; tmp = _mm_mul_ps(sd, sd); tmp = _mm_mul_ps(tmp, twos); exponent = _mm_div_ps(exponent, tmp); // exponent = exp(exponent); exponent = _mm_exp_ps(exponent); // exponent /= sd * sqrt(2 * pi) tmp = _mm_mul_ps(sd, sqrt_pi_2_s); tmp = _mm_div_ps(exponent, tmp); #ifndef NDEBUG const float* _result = (float*)&tmp; boost::math::normal_distribution<float> cNormal(_mean, _stdDev); assert(fastabs(_result[0] - boost::math::pdf(cNormal, _partitions[0])) < 0.001f); assert(fastabs(_result[1] - boost::math::pdf(cNormal, _partitions[1])) < 0.001f); assert(fastabs(_result[2] - boost::math::pdf(cNormal, _partitions[2])) < 0.001f); assert(fastabs(_result[3] - boost::math::pdf(cNormal, _partitions[3])) < 0.001f); #endif // return result: _mm_store_ps(result, tmp); };
int IntersectPlanes( TSRPlane* pPlaneA, TSRPlane* pPlaneB, TSRPlane* pPlaneC, TSRVector3& result ) { TSRVector3 m1( pPlaneA->n.x, pPlaneB->n.x, pPlaneC->n.x ); TSRVector3 m2( pPlaneA->n.y, pPlaneB->n.y, pPlaneC->n.y ); TSRVector3 m3( pPlaneA->n.z, pPlaneB->n.z, pPlaneC->n.z ); TSRVector3 u; u.Cross( m2, m3 ); float denom = m1.Dot( u ); if ( fastabs( denom ) < 0.001f ) return 0; TSRVector3 d( pPlaneA->d, pPlaneB->d, pPlaneC->d ); TSRVector3 v; v.Cross( m1, d ); float ood = 1.0f / denom; result.x = d.Dot( u ) * ood; result.y = m3.Dot( v )*ood; result.z = -m2.Dot( v )*ood; return 1; }
static inline void sliceplay_compensationfactor(t_sliceplay *x) // amplitude compensation { t_float speed = fastabs(x->speed); t_float compensationfactor; compensationfactor = x->compensationsetting * (1. - speed) + 1.; if(compensationfactor < 0.7) compensationfactor = 0.7; x->compensationfactor = compensationfactor; }
static void sliceplay_cuepoints(t_sliceplay *x, t_floatarg startindex, t_floatarg stopindex) { if((startindex < 0.) || (stopindex < 0.)) return; // negative cuepoints are ignored startindex -= 3.; // start earlier for interpolation stopindex -= 3.; t_int samples, start, stop; t_int accept = 0; t_int interrupt = x->interrupt; // x->interrupt can be -1, 0 or 1 if(interrupt == 1) accept = 1; // interrupt = 1: always interrupt else if(!x->playtimer) accept = 1; // if not playing, always accept new cuepoints else if((interrupt == -1) && (x->slicespeed < 0)) accept = 1; // interrupt = -1: only interrupt reversed playback if(accept) // accept new cuepoints conditionally { if(x->playtimer) // if interrupted, fade out previous slice { if(x->playtimer < COSTABSIZE>>1) x->fadeouttimer = x->playtimer; else x->fadeouttimer = COSTABSIZE>>1; x->fadeoutcounter = 0.; x->fadeoutpoint = x->currentindex + x->loopsize; // hand over previous settings x->fadeoutspeed = x->slicespeed; x->fadeoutcompensation = x->compensationfactor; } if(x->speed > 0.) x->startindex = (t_int)startindex + x->loopsize; // forward if(x->speed < 0.) x->startindex = (t_int)stopindex + x->loopsize - 1.; // reverse x->counter = 0.; start = (t_int)startindex; stop = (t_int)stopindex; samples = ((stop - start + x->loopsize) & (x->loopmask)) - 1; x->playtimer = (t_int)(samples / x->speed); // compute playback time in number of samples x->playtimer = fastabs(x->playtimer); x->slicespeed = x->speed; sliceplay_compensationfactor(x); sliceplay_tick(x, fastabs(x->playtimer)); // report slice length in nr of samples } }
void neuralNet::activation_approx_sse(const float* _neuronOutput, float* result) { BOOST_STATIC_ASSERT(SIGMOIDCOEFFICIENT == 4.0f); // code adapted from http://ybeernet.blogspot.com/2011/03/speeding-up-sigmoid-function-by.html // approximates sigmoid function with coefficient 4.0f static const __m128 ones = _mm_set1_ps(1.0f); static const __m128 oneFourths = _mm_set1_ps(0.25f); static const __m128 fours = _mm_set1_ps(4.0f); __m128 temp; const __m128* vOutput = (__m128*)_neuronOutput; // min (output, 4.0) temp = _mm_min_ps(*vOutput, fours); // multiply by 0.25 temp = _mm_mul_ps(temp, oneFourths); // 1 - ans temp = _mm_sub_ps(ones, temp); // ans^16 temp = _mm_mul_ps(temp, temp); temp = _mm_mul_ps(temp, temp); temp = _mm_mul_ps(temp, temp); temp = _mm_mul_ps(temp, temp); // 1 + ans temp = _mm_add_ps(ones, temp); // 1 / ans temp = _mm_rcp_ps(temp); #ifndef NDEBUG const float* _temp = (float*)&temp; assert(fastabs(_temp[0] - activation(_neuronOutput[0])) < 0.05f); assert(fastabs(_temp[1] - activation(_neuronOutput[1])) < 0.05f); assert(fastabs(_temp[2] - activation(_neuronOutput[2])) < 0.05f); assert(fastabs(_temp[3] - activation(_neuronOutput[3])) < 0.05f); #endif // return ans _mm_store_ps(result, temp); };
void neuralNet::activation_approx(const float* _neuronOutput, float* result) { BOOST_STATIC_ASSERT(SIGMOIDCOEFFICIENT == 4.0f); // code from http://ybeernet.blogspot.com/2011/03/speeding-up-sigmoid-function-by.html // approximates sigmoid function with coefficient 4.0f float tmp = std::min(*_neuronOutput, 4.0f); tmp = 1.0f - 0.25f * tmp; tmp *= tmp; tmp *= tmp; tmp *= tmp; tmp *= tmp; tmp = 1.0f / (1.0f + tmp); assert(fastabs(tmp - activation(*_neuronOutput)) < 0.05f); // return ans *result = tmp; };
void cBSPDemoCallbacks::OnEvent( TSREvent& _event ) { switch ( _event.type ) { case TWISTER_KEYDOWN: #ifdef ENABLE_EDITING if ( KEYDOWN( TWISTER_KEY_p ) ) { Painting* pNewPainting = new Painting(); char paintingName[ 128 ]; sprintf( paintingName, "database//objects//painting%d.txt", g_pPaintings->m_Paintings.size() ); pNewPainting->SetName( paintingName ); FormulateViewMatrix( g_pCamera, pNewPainting->m_Transform ); pNewPainting->Save(); } if ( KEYDOWN( TWISTER_KEY_1 ) ) { if ( g_pCurrentPainting ) { TSRMatrix4& transform = g_pCurrentPainting->m_Transform; TSRMatrix4 rot; rot.MakeIdent(); rot.IsAxisRotation( TSRVector3( 1.0f, 0.0f, 0.0f ), PI / 2.0f ); transform = rot * transform ; g_pCurrentPainting->Save(); } } if ( KEYDOWN( TWISTER_KEY_2 ) ) { if ( g_pCurrentPainting ) { TSRMatrix4& transform = g_pCurrentPainting->m_Transform; TSRMatrix4 rot; rot.MakeIdent(); rot.IsAxisRotation( TSRVector3( 0.0f, 1.0f, 0.0f ), PI / 2.0f ); transform = rot * transform ; g_pCurrentPainting->Save(); } } if ( KEYDOWN( TWISTER_KEY_3 ) ) { if ( g_pCurrentPainting ) { TSRMatrix4& transform = g_pCurrentPainting->m_Transform; TSRMatrix4 rot; rot.MakeIdent(); rot.IsAxisRotation( TSRVector3( 0.0f, 0.0f, 1.0f ), PI / 2.0f ); transform = rot * transform ; g_pCurrentPainting->Save(); } } if ( KEYDOWN( TWISTER_KEY_n ) ) { g_PaintingsIndex++; if ( g_PaintingsIndex == g_pPaintings->m_Paintings.size() ) { g_PaintingsIndex = 0; } g_pCurrentPainting = g_pPaintings->m_Paintings[g_PaintingsIndex]; } if ( KEYDOWN( TWISTER_KEY_v ) ) { g_bDebugRenderLights = !g_bDebugRenderLights; break; } #endif // ENABLE_EDITING if ( KEYDOWN( TWISTER_KEY_l ) ) { TSRColor4 LightColor; LightColor.r = fastabs( m_pWorld->m_Camera.m_Fwd.x ); LightColor.g = fastabs( m_pWorld->m_Camera.m_Fwd.y ); LightColor.b = fastabs( m_pWorld->m_Camera.m_Fwd.z ); LightColor.a = 4.0f; LightsManager()->AddPointLight( m_pWorld->m_Camera.m_Loc, LightColor, 100.0f ); } if ( KEYDOWN( TWISTER_KEY_k ) ) { if ( LightsManager()->m_SceneLightsContext.GetPointLightsCount() > 0 ) { SAFE_DELETE( LightsManager()->m_SceneLightsContext.m_PointLights.back() ); LightsManager()->m_SceneLightsContext.m_PointLights.pop_back(); } } break; } }
static void sliceplay_minspeed(t_sliceplay *x, t_floatarg minspeed) // minimum playback speed { x->minspeed = fastabs(minspeed); }