float ComputeGravitation_SIMD_openmp( float *force[3], float *pos[4], float *mass, float softeningSquared, size_t N ) { chTimerTimestamp start, end; chTimerGetTime( &start ); #pragma omp parallel for for (int i = 0; i < N; i++) { v4sf ax = vec_zero; v4sf ay = vec_zero; v4sf az = vec_zero; v4sf *px = (v4sf *) pos[0]; v4sf *py = (v4sf *) pos[1]; v4sf *pz = (v4sf *) pos[2]; v4sf *pmass = (v4sf *) mass; v4sf x0 = _vec_set_ps1( pos[0][i] ); v4sf y0 = _vec_set_ps1( pos[1][i] ); v4sf z0 = _vec_set_ps1( pos[2][i] ); for ( int j = 0; j < N/4; j++ ) { bodyBodyInteraction( ax, ay, az, x0, y0, z0, px[j], py[j], pz[j], pmass[j], _vec_set_ps1( softeningSquared ) ); } // Accumulate sum of four floats in the AltiVec register force[0][i] = _vec_sum( ax ); force[1][i] = _vec_sum( ay ); force[2][i] = _vec_sum( az ); } chTimerGetTime( &end ); return (float) chTimerElapsedTime( &start, &end ) * 1000.0f; }
void BodySystemCPU::_computeNBodyGravitation() { for(int i = 0; i < m_numBodies; ++i) { m_force[i*4] = m_force[i*4+1] = m_force[i*4+2] = 0; for(int j = 0; j < m_numBodies; ++j) { float acc[3] = {0, 0, 0}; bodyBodyInteraction(acc, &m_pos[m_currentRead][j*4], &m_pos[m_currentRead][i*4], m_softeningSquared); for (int k = 0; k < 3; ++k) { m_force[i*4+k] += acc[k]; } } } }
//////////////////////////////////////////////////////////////////////////////// //! Compute reference data set //! Each element is multiplied with the number of threads / array length //! @param reference reference data, computed but preallocated //! @param idata input data as provided to device //! @param len number of elements in reference / idata //////////////////////////////////////////////////////////////////////////////// void computeGold( float* force, float* pos, const unsigned int numBodies, float softeningSquared) { for(unsigned int i = 0; i < numBodies; ++i) { force[i*4 ] = 0; force[i*4+1] = 0; force[i*4+2] = 0; force[i*4+3] = 0; } for(unsigned int i = 0; i < numBodies; ++i) { for(unsigned int j = 0; j < numBodies; ++j) { float f[4]; bodyBodyInteraction(f, &pos[j*4], &pos[i*4], softeningSquared); for (int k = 0; k < 3; ++k) { force[i*4+k] += f[k]; } } } }