Example #1
0
    virtual bool fragment(Vec3f bar, TGAColor &color) {
		//B3_PROFILE("fragment");
        Vec4f p = m_viewportMat*(varying_tri_light_view*bar);
        float depth = p[2];
        p = p/p[3];
        
		float index_x = b3Max(float(0.0), b3Min(float(m_width-1), p[0]));
		float index_y = b3Max(float(0.0), b3Min(float(m_height-1), p[1]));
		int idx = int(index_x) + int(index_y)*m_width; // index in the shadowbuffer array
        float shadow = 0.8+0.2*(m_shadowBuffer->at(idx)<-depth+0.05); // magic coeff to avoid z-fighting
        
        Vec3f bn = (varying_nrm*bar).normalize();
        Vec2f uv = varying_uv*bar;
        
        Vec3f reflection_direction = (bn * (bn * m_light_dir_local * 2.f) - m_light_dir_local).normalize();
        float specular = pow(b3Max(reflection_direction.z, 0.f), m_model->specular(uv));
        float diffuse = b3Max(0.f, bn * m_light_dir_local);
        
        color = m_model->diffuse(uv);
        color[0] *= m_colorRGBA[0];
        color[1] *= m_colorRGBA[1];
        color[2] *= m_colorRGBA[2];
        color[3] *= m_colorRGBA[3];
        
        for (int i = 0; i < 3; ++i)
        {
            color[i] = b3Min(int(m_ambient_coefficient*color[i] + shadow*(m_diffuse_coefficient*diffuse+m_specular_coefficient*specular)*color[i]*m_light_color[i]), 255);
        }
        
        return false;

    }
Example #2
0
bool b3BroadPhase::QueryCallback(i32 proxyId) {
	if (proxyId == m_queryProxyId) {
		// The proxy can't overlap with itself.
		return true;
	}

	// Check capacity.
	if (m_pairBufferCount == m_pairBufferCapacity) {
		// Duplicate capacity.
		m_pairBufferCapacity *= 2;
		
		b3Pair* oldPairBuffer = m_pairBuffer;
		m_pairBuffer = (b3Pair*)::b3Alloc(m_pairBufferCapacity * sizeof(b3Pair));
		::memcpy(m_pairBuffer, oldPairBuffer, m_pairBufferCount * sizeof(b3Pair));
		::b3Free(oldPairBuffer);
	}

	// Add overlapping pair to the pair buffer.
	m_pairBuffer[m_pairBufferCount].proxy1 = b3Min(proxyId, m_queryProxyId);
	m_pairBuffer[m_pairBufferCount].proxy2 = b3Max(proxyId, m_queryProxyId);
	++m_pairBufferCount;

	// Keep looking for overlapping pairs.
	return true;
}
Example #3
0
bool b3BroadPhase::Report(u32 proxyId) 
{
	if (proxyId == m_queryProxyId) 
	{
		// The proxy can't overlap with itself.
		return true;
	}

	// Check capacity.
	if (m_pairCount == m_pairCapacity) 
	{
		// Duplicate capacity.
		m_pairCapacity *= 2;
		
		b3Pair* oldPairs = m_pairs;
		m_pairs = (b3Pair*)b3Alloc(m_pairCapacity * sizeof(b3Pair));
		memcpy(m_pairs, oldPairs, m_pairCount * sizeof(b3Pair));
		b3Free(oldPairs);
	}

	// Add overlapping pair to the pair buffer.
	m_pairs[m_pairCount].proxy1 = b3Min(proxyId, m_queryProxyId);
	m_pairs[m_pairCount].proxy2 = b3Max(proxyId, m_queryProxyId);
	++m_pairCount;

	// Keep looking for overlapping pairs.
	return true;
}
Example #4
0
    virtual bool fragment(Vec3f bar, TGAColor &color) {
        Vec3f bn = (varying_nrm*bar).normalize();
        Vec2f uv = varying_uv*bar;

        Vec3f reflection_direction = (bn * (bn * m_light_dir_local * 2.f) - m_light_dir_local).normalize();
        float specular = pow(b3Max(reflection_direction.z, 0.f), m_model->specular(uv));
        float diffuse = b3Max(0.f, bn * m_light_dir_local);

	float ambient_coefficient = 0.6;
        float diffuse_coefficient = 0.35;
        float specular_coefficient = 0.05;

        float intensity = ambient_coefficient + b3Min(diffuse * diffuse_coefficient + specular * specular_coefficient, 1.0f - ambient_coefficient);

        color = m_model->diffuse(uv) * intensity;

        //warning: bgra color is swapped to rgba to upload texture
        color.bgra[0] *= m_colorRGBA[0];
        color.bgra[1] *= m_colorRGBA[1];
        color.bgra[2] *= m_colorRGBA[2];
        color.bgra[3] *= m_colorRGBA[3];
        
        color.bgra[0] *= m_light_color[0];
        color.bgra[1] *= m_light_color[1];
        color.bgra[2] *= m_light_color[2];

        return false;
    }
Example #5
0
    virtual bool fragment(Vec3f bar, TGAColor &color) {
        Vec3f bn = (varying_nrm*bar).normalize();
        Vec2f uv = varying_uv*bar;

        mat<3,3,float> A;
        A[0] = ndc_tri.col(1) - ndc_tri.col(0);
        A[1] = ndc_tri.col(2) - ndc_tri.col(0);
        A[2] = bn;

        mat<3,3,float> AI = A.invert();

        Vec3f i = AI * Vec3f(varying_uv[0][1] - varying_uv[0][0], varying_uv[0][2] - varying_uv[0][0], 0);
        Vec3f j = AI * Vec3f(varying_uv[1][1] - varying_uv[1][0], varying_uv[1][2] - varying_uv[1][0], 0);

        mat<3,3,float> B;
        B.set_col(0, i.normalize());
        B.set_col(1, j.normalize());
        B.set_col(2, bn);

        Vec3f n = (B*m_model->normal(uv)).normalize();

        float diff = b3Min(b3Max(0.f, n*m_light_dir_local+0.3f),1.f);
		//float diff = b3Max(0.f, n*m_light_dir_local);
        color = m_model->diffuse(uv)*diff;

        return false;
    }
static inline void b3SolveContact(b3ContactConstraint4& cs, 
	const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
	const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, 
	float maxRambdaDt[4], float minRambdaDt[4])
{

	b3Vector3 dLinVelA; dLinVelA.setZero();
	b3Vector3 dAngVelA; dAngVelA.setZero();
	b3Vector3 dLinVelB; dLinVelB.setZero();
	b3Vector3 dAngVelB; dAngVelB.setZero();

	for(int ic=0; ic<4; ic++)
	{
		//	dont necessary because this makes change to 0
		if( cs.m_jacCoeffInv[ic] == 0.f ) continue;

		{
			b3Vector3 angular0, angular1, linear;
			b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
			b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
			b3SetLinearAndAngular( (const b3Vector3 &)-cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, linear, angular0, angular1 );

			float rambdaDt = b3CalcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1,
				linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic];
			rambdaDt *= cs.m_jacCoeffInv[ic];

			{
				float prevSum = cs.m_appliedRambdaDt[ic];
				float updated = prevSum;
				updated += rambdaDt;
				updated = b3Max( updated, minRambdaDt[ic] );
				updated = b3Min( updated, maxRambdaDt[ic] );
				rambdaDt = updated - prevSum;
				cs.m_appliedRambdaDt[ic] = updated;
			}

			b3Vector3 linImp0 = invMassA*linear*rambdaDt;
			b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
			b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
			b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
#ifdef _WIN32
            b3Assert(_finite(linImp0.getX()));
			b3Assert(_finite(linImp1.getX()));
#endif
			{
				linVelA += linImp0;
				angVelA += angImp0;
				linVelB += linImp1;
				angVelB += angImp1;
			}
		}
	}


}
Example #7
0
	/// Returns the time in us since the last call to reset or since 
	/// the Clock was created.
unsigned long int b3Clock::getTimeMicroseconds()
{
#ifdef B3_USE_WINDOWS_TIMERS
		LARGE_INTEGER currentTime;
		QueryPerformanceCounter(&currentTime);
		LONGLONG elapsedTime = currentTime.QuadPart - 
			m_data->mStartTime.QuadPart;

		// Compute the number of millisecond ticks elapsed.
		unsigned long msecTicks = (unsigned long)(1000 * elapsedTime / 
			m_data->mClockFrequency.QuadPart);

		// Check for unexpected leaps in the Win32 performance counter.  
		// (This is caused by unexpected data across the PCI to ISA 
		// bridge, aka south bridge.  See Microsoft KB274323.)
		unsigned long elapsedTicks = GetTickCount() - m_data->mStartTick;
		signed long msecOff = (signed long)(msecTicks - elapsedTicks);
		if (msecOff < -100 || msecOff > 100)
		{
			// Adjust the starting time forwards.
			LONGLONG msecAdjustment = b3Min(msecOff * 
				m_data->mClockFrequency.QuadPart / 1000, elapsedTime - 
				m_data->mPrevElapsedTime);
			m_data->mStartTime.QuadPart += msecAdjustment;
			elapsedTime -= msecAdjustment;
		}

		// Store the current elapsed time for adjustments next time.
		m_data->mPrevElapsedTime = elapsedTime;

		// Convert to microseconds.
		unsigned long usecTicks = (unsigned long)(1000000 * elapsedTime / 
			m_data->mClockFrequency.QuadPart);

		return usecTicks;
#else

#ifdef __CELLOS_LV2__
		uint64_t freq=sys_time_get_timebase_frequency();
		double dFreq=((double) freq)/ 1000000.0;
		typedef uint64_t  ClockSize;
		ClockSize newTime;
		//__asm __volatile__( "mftb %0" : "=r" (newTime) : : "memory");
		SYS_TIMEBASE_GET( newTime );

		return (unsigned long int)((double(newTime-m_data->mStartTime)) / dFreq);
#else

		struct timeval currentTime;
		gettimeofday(&currentTime, 0);
		return (currentTime.tv_sec - m_data->mStartTime.tv_sec) * 1000000 + 
			(currentTime.tv_usec - m_data->mStartTime.tv_usec);
#endif//__CELLOS_LV2__
#endif 
}
Example #8
0
bool radixSortTest()
{
	TEST_INIT;
	
	int maxSize = 1024*256;

	b3AlignedObjectArray<b3SortData> buf0Host;
	buf0Host.resize(maxSize);
	b3AlignedObjectArray<b3SortData> buf1Host;
	buf1Host.resize(maxSize );
	b3OpenCLArray<b3SortData> buf2CL(g_context,g_queue,maxSize);

	b3RadixSort32CL* sort = new b3RadixSort32CL(g_context,g_device,g_queue,maxSize);

	int dx = maxSize/NUM_TESTS;
	for(int iter=0; iter<NUM_TESTS; iter++)
	{
		int size = b3Min( 128+dx*iter, maxSize-512 );
		size = NEXTMULTIPLEOF( size, 512 );//not necessary
		
		buf0Host.resize(size);

		for(int i=0; i<size; i++)
		{
			b3SortData v;
			v.m_key = getRandom(0,0xff);
			v.m_value = i;
			buf0Host[i] = v;
		}

		buf2CL.copyFromHost( buf0Host);
		

		sort->executeHost( buf0Host);
		sort->execute(buf2CL);

		buf2CL.copyToHost(buf1Host);
				
		for(int i=0; i<size; i++) 
		{
			TEST_ASSERT( buf0Host[i].m_value == buf1Host[i].m_value && buf0Host[i].m_key == buf1Host[i].m_key );
		}
	}

	delete sort;

	TEST_REPORT( "radixSort" );

	return g_testFailed;
}
void solveContact3(b3GpuConstraint4* cs,
			b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
			b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
			b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB)
{
	float minRambdaDt = 0;
	float maxRambdaDt = FLT_MAX;

	for(int ic=0; ic<4; ic++)
	{
		if( cs->m_jacCoeffInv[ic] == 0.f ) continue;

		b3Vector3 angular0, angular1, linear;
		b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr;
		b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr;
		setLinearAndAngular( cs->m_linear, r0, r1, linear, angular0, angular1 );

		float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, 
			*linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];
		rambdaDt *= cs->m_jacCoeffInv[ic];

		{
			float prevSum = cs->m_appliedRambdaDt[ic];
			float updated = prevSum;
			updated += rambdaDt;
			updated = b3Max( updated, minRambdaDt );
			updated = b3Min( updated, maxRambdaDt );
			rambdaDt = updated - prevSum;
			cs->m_appliedRambdaDt[ic] = updated;
		}

		b3Vector3 linImp0 = invMassA*linear*rambdaDt;
		b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
		b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
		b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;

		if (invMassA)
		{
			*dLinVelA += linImp0;
			*dAngVelA += angImp0;
		}
		if (invMassB)
		{
			*dLinVelB += linImp1;
			*dAngVelB += angImp1;
		}
	}
}
Example #10
0
void GLInstancingRenderer::drawPoints(const float* positions, const float color[4], int numPoints, int pointStrideInBytes, float pointDrawSize)
{

	glActiveTexture(GL_TEXTURE0);
	glBindTexture(GL_TEXTURE_2D,0);

	b3Assert(glGetError() ==GL_NO_ERROR);
	glUseProgram(linesShader);
	glUniformMatrix4fv(lines_ProjectionMatrix, 1, false, &m_data->m_projectionMatrix[0]);
	glUniformMatrix4fv(lines_ModelViewMatrix, 1, false, &m_data->m_viewMatrix[0]);
	glUniform4f(lines_colour,color[0],color[1],color[2],color[3]);

	glPointSize(pointDrawSize);
	glBindVertexArray(lineVertexArrayObject);

    glBindBuffer(GL_ARRAY_BUFFER, lineVertexBufferObject);

	int maxPointsInBatch = MAX_POINTS_IN_BATCH;
	int remainingPoints = numPoints;
	int offsetNumPoints= 0;
	while (1)
	{
		int curPointsInBatch = b3Min(maxPointsInBatch, remainingPoints);
		if (curPointsInBatch)
		{

			glBufferSubData(GL_ARRAY_BUFFER, 0, curPointsInBatch*pointStrideInBytes, positions + offsetNumPoints*(pointStrideInBytes / sizeof(float)));
			glEnableVertexAttribArray(0);
			int numFloats = 3;// pointStrideInBytes / sizeof(float);
			glVertexAttribPointer(0, numFloats, GL_FLOAT, GL_FALSE, pointStrideInBytes, 0);
			glDrawArrays(GL_POINTS, 0, curPointsInBatch);
			remainingPoints -= curPointsInBatch;
			offsetNumPoints += curPointsInBatch;
		}
		 else
		 {
			 break;
		 }
	}

	glBindVertexArray(0);
	glPointSize(1);
	glUseProgram(0);
}
Example #11
0
void prefixScanTest()
{
	TEST_INIT;

	int maxSize = 1024*256;

	b3AlignedObjectArray<unsigned int> buf0Host;
	b3AlignedObjectArray<unsigned int> buf1Host;

	b3OpenCLArray<unsigned int> buf2CL(g_context,g_queue,maxSize);
	b3OpenCLArray<unsigned int> buf3CL(g_context,g_queue,maxSize);
	
	
	b3PrefixScanCL* scan = new b3PrefixScanCL(g_context,g_device,g_queue,maxSize);
		
	int dx = maxSize/NUM_TESTS;
	for(int iter=0; iter<NUM_TESTS; iter++)
	{
		int size = b3Min( 128+dx*iter, maxSize );
		buf0Host.resize(size);
		buf1Host.resize(size);

		for(int i=0; i<size; i++) 
			buf0Host[i] = 1;
		
		buf2CL.copyFromHost( buf0Host);
	
		unsigned int sumHost, sumGPU;

		scan->executeHost(buf0Host, buf1Host, size, &sumHost );
		scan->execute( buf2CL, buf3CL, size, &sumGPU );

		buf3CL.copyToHost(buf0Host);
		
		TEST_ASSERT( sumHost == sumGPU );
		for(int i=0; i<size; i++) 
			TEST_ASSERT( buf1Host[i] == buf0Host[i] );
	}

	delete scan;

	TEST_REPORT( "scanTest" );
}
Example #12
0
inline void fillIntTest()
{
	TEST_INIT;

	b3FillCL* fillCL = new b3FillCL(g_context,g_device,g_queue);
	int maxSize=1024*256;
	b3OpenCLArray<int> intBuffer(g_context,g_queue,maxSize);
	intBuffer.resize(maxSize);
	
#define NUM_TESTS 7

	int dx = maxSize/NUM_TESTS;
	for (int iter=0;iter<NUM_TESTS;iter++)
	{
		int size = b3Min( 11+dx*iter, maxSize );

		int value = 2;
		

		int offset=0;
		fillCL->execute(intBuffer,value,size,offset);

		b3AlignedObjectArray<int> hostBuf2;
		hostBuf2.resize(size);
		fillCL->executeHost(hostBuf2,value,size,offset);

		b3AlignedObjectArray<int> hostBuf;
		intBuffer.copyToHost(hostBuf);

		for(int i=0; i<size; i++)
		{
				TEST_ASSERT( hostBuf[i] == hostBuf2[i] );
				TEST_ASSERT( hostBuf[i] == hostBuf2[i] );
		}
	}

	

	delete fillCL;

	TEST_REPORT( "fillIntTest" );
}
Example #13
0
static
__inline
void solveFriction(b3GpuConstraint4& cs,
                   const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
                   const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
                   float maxRambdaDt[4], float minRambdaDt[4])
{

    if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return;
    const b3Vector3& center = (const b3Vector3&)cs.m_center;

    b3Vector3 n = -(const b3Vector3&)cs.m_linear;

    b3Vector3 tangent[2];
#if 1
    b3PlaneSpace1 (n, tangent[0],tangent[1]);
#else
    b3Vector3 r = cs.m_worldPos[0]-center;
    tangent[0] = cross3( n, r );
    tangent[1] = cross3( tangent[0], n );
    tangent[0] = normalize3( tangent[0] );
    tangent[1] = normalize3( tangent[1] );
#endif

    b3Vector3 angular0, angular1, linear;
    b3Vector3 r0 = center - posA;
    b3Vector3 r1 = center - posB;
    for(int i=0; i<2; i++)
    {
        setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 );
        float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
                                    linVelA, angVelA, linVelB, angVelB );
        rambdaDt *= cs.m_fJacCoeffInv[i];

        {
            float prevSum = cs.m_fAppliedRambdaDt[i];
            float updated = prevSum;
            updated += rambdaDt;
            updated = b3Max( updated, minRambdaDt[i] );
            updated = b3Min( updated, maxRambdaDt[i] );
            rambdaDt = updated - prevSum;
            cs.m_fAppliedRambdaDt[i] = updated;
        }

        b3Vector3 linImp0 = invMassA*linear*rambdaDt;
        b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
        b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
        b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
#ifdef _WIN32
        b3Assert(_finite(linImp0.getX()));
        b3Assert(_finite(linImp1.getX()));
#endif
        linVelA += linImp0;
        angVelA += angImp0;
        linVelB += linImp1;
        angVelB += angImp1;
    }

    {   //	angular damping for point constraint
        b3Vector3 ab = ( posB - posA ).normalized();
        b3Vector3 ac = ( center - posA ).normalized();
        if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
        {
            float angNA = b3Dot( n, angVelA );
            float angNB = b3Dot( n, angVelB );

            angVelA -= (angNA*0.1f)*n;
            angVelB -= (angNB*0.1f)*n;
        }
    }

}
void							b3DynamicBvhBroadphase::collide(b3Dispatcher* dispatcher)
{
	/*printf("---------------------------------------------------------\n");
	printf("m_sets[0].m_leaves=%d\n",m_sets[0].m_leaves);
	printf("m_sets[1].m_leaves=%d\n",m_sets[1].m_leaves);
	printf("numPairs = %d\n",getOverlappingPairCache()->getNumOverlappingPairs());
	{
		int i;
		for (i=0;i<getOverlappingPairCache()->getNumOverlappingPairs();i++)
		{
			printf("pair[%d]=(%d,%d),",i,getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy0->getUid(),
				getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy1->getUid());
		}
		printf("\n");
	}
*/



	b3SPC(m_profiling.m_total);
	/* optimize				*/ 
	m_sets[0].optimizeIncremental(1+(m_sets[0].m_leaves*m_dupdates)/100);
	if(m_fixedleft)
	{
		const int count=1+(m_sets[1].m_leaves*m_fupdates)/100;
		m_sets[1].optimizeIncremental(1+(m_sets[1].m_leaves*m_fupdates)/100);
		m_fixedleft=b3Max<int>(0,m_fixedleft-count);
	}
	/* dynamic -> fixed set	*/ 
	m_stageCurrent=(m_stageCurrent+1)%STAGECOUNT;
	b3DbvtProxy*	current=m_stageRoots[m_stageCurrent];
	if(current)
	{
		b3DbvtTreeCollider	collider(this);
		do	{
			b3DbvtProxy*	next=current->links[1];
			b3ListRemove(current,m_stageRoots[current->stage]);
			b3ListAppend(current,m_stageRoots[STAGECOUNT]);
#if B3_DBVT_BP_ACCURATESLEEPING
			m_paircache->removeOverlappingPairsContainingProxy(current,dispatcher);
			collider.proxy=current;
			b3DynamicBvh::collideTV(m_sets[0].m_root,current->aabb,collider);
			b3DynamicBvh::collideTV(m_sets[1].m_root,current->aabb,collider);
#endif
			m_sets[0].remove(current->leaf);
			B3_ATTRIBUTE_ALIGNED16(b3DbvtVolume)	curAabb=b3DbvtVolume::FromMM(current->m_aabbMin,current->m_aabbMax);
			current->leaf	=	m_sets[1].insert(curAabb,current);
			current->stage	=	STAGECOUNT;	
			current			=	next;
		} while(current);
		m_fixedleft=m_sets[1].m_leaves;
		m_needcleanup=true;
	}
	/* collide dynamics		*/ 
	{
		b3DbvtTreeCollider	collider(this);
		if(m_deferedcollide)
		{
			b3SPC(m_profiling.m_fdcollide);
			m_sets[0].collideTTpersistentStack(m_sets[0].m_root,m_sets[1].m_root,collider);
		}
		if(m_deferedcollide)
		{
			b3SPC(m_profiling.m_ddcollide);
			m_sets[0].collideTTpersistentStack(m_sets[0].m_root,m_sets[0].m_root,collider);
		}
	}
	/* clean up				*/ 
	if(m_needcleanup)
	{
		b3SPC(m_profiling.m_cleanup);
		b3BroadphasePairArray&	pairs=m_paircache->getOverlappingPairArray();
		if(pairs.size()>0)
		{

			int			ni=b3Min(pairs.size(),b3Max<int>(m_newpairs,(pairs.size()*m_cupdates)/100));
			for(int i=0;i<ni;++i)
			{
				b3BroadphasePair&	p=pairs[(m_cid+i)%pairs.size()];
				b3DbvtProxy*		pa=&m_proxies[p.x];
				b3DbvtProxy*		pb=&m_proxies[p.y];
				if(!b3Intersect(pa->leaf->volume,pb->leaf->volume))
				{
#if B3_DBVT_BP_SORTPAIRS
					if(pa->m_uniqueId>pb->m_uniqueId) 
						b3Swap(pa,pb);
#endif
					m_paircache->removeOverlappingPair(pa->getUid(),pb->getUid(),dispatcher);
					--ni;--i;
				}
			}
			if(pairs.size()>0) m_cid=(m_cid+ni)%pairs.size(); else m_cid=0;
		}
	}
	++m_pid;
	m_newpairs=1;
	m_needcleanup=false;
	if(m_updates_call>0)
	{ m_updates_ratio=m_updates_done/(b3Scalar)m_updates_call; }
	else
	{ m_updates_ratio=0; }
	m_updates_done/=2;
	m_updates_call/=2;
}
Example #15
0
void b3Island::Solve(const b3Vec3& gravityDir) {
	r32 h = dt;
	b3Vec3 gravityForce = B3_GRAVITY_ACC * gravityDir;

	// Integrate velocities.
	for (u32 i = 0; i < bodyCount; ++i) {
		b3Body* b = bodies[i];

		b3Vec3 v = b->m_linearVelocity;
		b3Vec3 w = b->m_angularVelocity;
		b3Vec3 x = b->m_worldCenter;
		b3Quaternion q = b->m_orientation;

		if (b->m_type == e_dynamicBody) {
			// Use semi-implitic Euler.
			b3Vec3 force = b->m_gravityScale * gravityForce + b->m_force;
			v += (h * b->m_invMass) * force;
			w += h * (b->m_invWorldInertia * b->m_torque);

			// References: Box2D.
			// Apply damping.
			// ODE: dv/dt + c * v = 0
			// Solution: v(t) = v0 * exp(-c * t)
			// Time step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt)
			// v2 = exp(-c * dt) * v1
			// Pade approximation:
			// v2 = v1 * 1 / (1 + c * dt)
			v *= B3_ONE / (B3_ONE + h * r32(0.1));
			w *= B3_ONE / (B3_ONE + h * r32(0.1));
		}

		velocities[i].v = v;
		velocities[i].w = w;
		positions[i].x = x;
		positions[i].q = q;
	}

	b3JointSolverDef jointSolverDef;
	jointSolverDef.dt = h;
	jointSolverDef.joints = joints;
	jointSolverDef.count = jointCount;
	jointSolverDef.positions = positions;
	jointSolverDef.velocities = velocities;

	b3JointSolver jointSolver(&jointSolverDef);
	jointSolver.InitializeVelocityConstraints();

	b3ContactSolverDef contactSolverDef;
	contactSolverDef.dt = h;
	contactSolverDef.contacts = contacts;
	contactSolverDef.count = contactCount;
	contactSolverDef.positions = positions;
	contactSolverDef.velocities = velocities;
	contactSolverDef.allocator = allocator;

	b3ContactSolver contactSolver(&contactSolverDef);
	contactSolver.InitializeVelocityConstraints();
	
	jointSolver.WarmStart();
	contactSolver.WarmStart();
	
	// Solve velocity constraints.
	for (u32 i = 0; i < velocityIterations; ++i) {
		jointSolver.SolveVelocityConstraints();
		contactSolver.SolveVelocityConstraints();
	}

	contactSolver.StoreImpulses();

	for (u32 i = 0; i < bodyCount; ++i) {
		b3Body* b = bodies[i];
		if (b->m_type == e_staticBody) {
			continue;
		}

		b3Vec3 x = positions[i].x;
		b3Quaternion q1 = positions[i].q;
		b3Vec3 v = velocities[i].v;
		b3Vec3 w = velocities[i].w;
		
		x += h * v;
		
		b3Quaternion q2 = Integrate(q1, w, h);

		positions[i].x = x;
		positions[i].q = q2;
		velocities[i].v = v;
		velocities[i].w = w;
	}

	for (u32 i = 0; i < bodyCount; ++i) {
		b3Body* b = bodies[i];
		if (b->m_type == e_staticBody) {
			continue;
		}

		b->m_worldCenter = positions[i].x;
		b->m_orientation = positions[i].q;
		b->m_linearVelocity = velocities[i].v;
		b->m_angularVelocity = velocities[i].w;
	}

	if (allowSleep) {
		r32 minSleepTime = B3_MAX_FLOAT;
		for (u32 i = 0; i < bodyCount; ++i) {
			b3Body* b = bodies[i];
			if (b->m_type == e_staticBody) {
				continue;
			}

			// Compute the linear and angular speed of the body.
			const r32 sqrLinVel = b3LenSq(b->m_linearVelocity);
			const r32 sqrAngVel = b3LenSq(b->m_angularVelocity);

			if (sqrLinVel > B3_SLEEP_LINEAR_TOL || sqrAngVel > B3_SLEEP_ANGULAR_TOL) {
				minSleepTime = B3_ZERO;
				b->m_sleepTime = B3_ZERO;
			}
			else {
				b->m_sleepTime += h;
				minSleepTime = b3Min(minSleepTime, b->m_sleepTime);
			}
		}

		// Put the island to sleep so long as the minimum found sleep time
		// is below the threshold. 
		if (minSleepTime >= B3_TIME_TO_SLEEP) {
			for (u32 i = 0; i < bodyCount; ++i) {
				bodies[i]->SetAwake(false);
			}
		}
	}
}
Example #16
0
void boundSearchTest( )
{
	TEST_INIT;

	int maxSize = 1024*256;
	int bucketSize = 256;

	b3OpenCLArray<b3SortData> srcCL(g_context,g_queue,maxSize);
	b3OpenCLArray<unsigned int> upperCL(g_context,g_queue,maxSize);
	b3OpenCLArray<unsigned int> lowerCL(g_context,g_queue,maxSize);
	
	b3AlignedObjectArray<b3SortData> srcHost;
	b3AlignedObjectArray<unsigned int> upperHost;
	b3AlignedObjectArray<unsigned int> lowerHost;
	b3AlignedObjectArray<unsigned int> upperHostCompare;
	b3AlignedObjectArray<unsigned int> lowerHostCompare;
	
	b3BoundSearchCL* search = new b3BoundSearchCL(g_context,g_device,g_queue, maxSize);


	int dx = maxSize/NUM_TESTS;
	for(int iter=0; iter<NUM_TESTS; iter++)
	{
		
		int size = b3Min( 128+dx*iter, maxSize );

		upperHost.resize(bucketSize);
		lowerHost.resize(bucketSize);
		upperHostCompare.resize(bucketSize);
		lowerHostCompare.resize(bucketSize);

		srcHost.resize(size);

		for(int i=0; i<size; i++) 
		{
			b3SortData v;
//			v.m_key = i<2? 0 : 5;
			v.m_key = getRandom(0,bucketSize);

			v.m_value = i;
			srcHost.at(i) = v;
		}

		srcHost.quickSort(b3SortDataCompare());
		srcCL.copyFromHost(srcHost);

		{
			
			for(int i=0; i<bucketSize; i++) 
			{
				lowerHost[i] = -1;
				lowerHostCompare[i] = -1;
				upperHost[i] = -1;
				upperHostCompare[i] = -1;
			}
			upperCL.copyFromHost(upperHost);
			lowerCL.copyFromHost(lowerHost);
		}

		search->execute(srcCL,size,upperCL,bucketSize,b3BoundSearchCL::BOUND_UPPER);
		search->execute(srcCL,size,lowerCL,bucketSize,b3BoundSearchCL::BOUND_LOWER);

		search->executeHost(srcHost,size,upperHostCompare,bucketSize,b3BoundSearchCL::BOUND_UPPER);
		search->executeHost(srcHost,size,lowerHostCompare,bucketSize,b3BoundSearchCL::BOUND_LOWER);

		lowerCL.copyToHost(lowerHost);
		upperCL.copyToHost(upperHost);
		for(int i=0; i<bucketSize; i++)
		{
			TEST_ASSERT(upperHostCompare[i] == upperHost[i]);
			TEST_ASSERT(lowerHostCompare[i] == lowerHost[i]);
		}
		/*
		for(int i=1; i<bucketSize; i++)
		{
			int lhi_1 = lowerHost[i-1];
			int lhi = lowerHost[i];

			for(int j=lhi_1; j<lhi; j++)
			//for(int j=lowerHost[i-1]; j<lowerHost[i]; j++)
			{
				TEST_ASSERT( srcHost[j].m_key < i );
			}
		}

		for(int i=0; i<bucketSize; i++)
		{
			int jMin = (i==0)?0:upperHost[i-1];
			for(int j=jMin; j<upperHost[i]; j++)
			{
				TEST_ASSERT( srcHost[j].m_key <= i );
			}
		}
		*/


		for(int i=0; i<bucketSize; i++)
		{
			int lhi = lowerHost[i];
			int uhi = upperHost[i];

			for(int j=lhi; j<uhi; j++)
			{
				if ( srcHost[j].m_key != i )
				{
					printf("error %d != %d\n",srcHost[j].m_key,i);
				}
				TEST_ASSERT( srcHost[j].m_key == i );
			}
		}

	}

	delete search;

	TEST_REPORT( "boundSearchTest" );
}
Example #17
0
void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */)
{
	
	int originalSize = keyValuesInOut.size();
	int workingSize = originalSize;
	
			
	int dataAlignment = DATA_ALIGNMENT;

#ifdef DEBUG_RADIXSORT2
    b3AlignedObjectArray<b3SortData>   test2;
    keyValuesInOut.copyToHost(test2);
    printf("numElem = %d\n",test2.size());
    for (int i=0;i<test2.size();i++)
    {
        printf("test2[%d].m_key=%d\n",i,test2[i].m_key);
        printf("test2[%d].m_value=%d\n",i,test2[i].m_value);
    }
#endif //DEBUG_RADIXSORT2
    
	b3OpenCLArray<b3SortData>* src = 0;

	if (workingSize%dataAlignment)
	{
		workingSize += dataAlignment-(workingSize%dataAlignment);
		m_workBuffer4->copyFromOpenCLArray(keyValuesInOut);
		m_workBuffer4->resize(workingSize);
		b3SortData fillValue;
		fillValue.m_key = 0xffffffff;
		fillValue.m_value = 0xffffffff;

#define USE_BTFILL
#ifdef USE_BTFILL
		m_fill->execute((b3OpenCLArray<b3Int2>&)*m_workBuffer4,(b3Int2&)fillValue,workingSize-originalSize,originalSize);
#else
		//fill the remaining bits (very slow way, todo: fill on GPU/OpenCL side)
		
		for (int i=originalSize; i<workingSize;i++)
		{
			m_workBuffer4->copyFromHostPointer(&fillValue,1,i);
		}
#endif//USE_BTFILL

		src = m_workBuffer4;
	} else
	{
		src = &keyValuesInOut;
		m_workBuffer4->resize(0);
	}
		
	b3Assert( workingSize%DATA_ALIGNMENT == 0 );
	int minCap = NUM_BUCKET*NUM_WGS;


	int n = workingSize;

	m_workBuffer1->resize(minCap);
	m_workBuffer3->resize(workingSize);
	

//	ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 );
	b3Assert( BITS_PER_PASS == 4 );
	b3Assert( WG_SIZE == 64 );
	b3Assert( (sortBits&0x3) == 0 );

	
	
	b3OpenCLArray<b3SortData>* dst = m_workBuffer3;

	b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1;
	b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2;


	int nWGs = NUM_WGS;
	b3ConstData cdata;

	{
        int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;//set at 256
     	int nBlocks = (n+blockSize-1)/(blockSize);
		cdata.m_n = n;
		cdata.m_nWGs = NUM_WGS;
		cdata.m_startBit = 0;
		cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1)/cdata.m_nWGs;
		if( nBlocks < NUM_WGS )
		{
			cdata.m_nBlocksPerWG = 1;
			nWGs = nBlocks;
		}
	}

	int count=0;
	for(int ib=0; ib<sortBits; ib+=4)
	{
#ifdef DEBUG_RADIXSORT2
        keyValuesInOut.copyToHost(test2);
        printf("numElem = %d\n",test2.size());
        for (int i=0;i<test2.size();i++)
        {
            if (test2[i].m_key != test2[i].m_value)
            {
                printf("test2[%d].m_key=%d\n",i,test2[i].m_key);
                printf("test2[%d].m_value=%d\n",i,test2[i].m_value);
            }
        }
#endif //DEBUG_RADIXSORT2
        
		cdata.m_startBit = ib;
		
		if (src->size())
		{
			b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) };
			b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel);

			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(  cdata );
			
			int num = NUM_WGS*WG_SIZE;
			launcher.launch1D( num, WG_SIZE );
		}

        
        
#ifdef DEBUG_RADIXSORT
		b3AlignedObjectArray<unsigned int> testHist;
		srcHisto->copyToHost(testHist);
		printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size());
		for (int i=0;i<testHist.size();i++)
		{
			if (testHist[i]!=0)
				printf("testHist[%d]=%d\n",i,testHist[i]);
		}
#endif //DEBUG_RADIXSORT
	
	

//fast prefix scan is not working properly on Mac OSX yet
#ifdef _WIN32
	bool fastScan=!m_deviceCPU;//only use fast scan on GPU
#else
	bool fastScan=false;
#endif

		if (fastScan)
		{//	prefix scan group histogram
			b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) };
			b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(  cdata );
			launcher.launch1D( 128, 128 );
			destHisto = srcHisto;
		}else
		{
			//unsigned int sum; //for debugging
            m_scan->execute(*srcHisto,*destHisto,1920,0);//,&sum);
		}


#ifdef DEBUG_RADIXSORT
		destHisto->copyToHost(testHist);
		printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size());
		for (int i=0;i<testHist.size();i++)
		{
			if (testHist[i]!=0)
				printf("testHist[%d]=%d\n",i,testHist[i]);
		}
        
        for (int i=0;i<testHist.size();i+=NUM_WGS)
		{
				printf("testHist[%d]=%d\n",i/NUM_WGS,testHist[i]);
		}

#endif //DEBUG_RADIXSORT

#define USE_GPU
#ifdef USE_GPU
        
		if (src->size())
		{//	local sort and distribute
			b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )};
			b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(  cdata );
			launcher.launch1D( nWGs*WG_SIZE, WG_SIZE );
            
		}
#else
        {
#define NUM_TABLES 16
//#define SEQUENTIAL
#ifdef SEQUENTIAL
            int counter2[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
            int tables[NUM_TABLES];
            int startBit = ib;
            
            destHisto->copyToHost(testHist);
            b3AlignedObjectArray<b3SortData> srcHost;
            b3AlignedObjectArray<b3SortData> dstHost;
            dstHost.resize(src->size());
            
            src->copyToHost(srcHost);
            
            for (int i=0;i<NUM_TABLES;i++)
            {
                tables[i] = testHist[i*NUM_WGS];
            }
            
            //	distribute
            for(int i=0; i<n; i++)
            {
                int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1);
                
                dstHost[tables[tableIdx] + counter2[tableIdx]] = srcHost[i];
                counter2[tableIdx] ++;
            }
            
            
#else
          
            int counter2[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
            
            int tables[NUM_TABLES];
             b3AlignedObjectArray<b3SortData> dstHostOK;
            dstHostOK.resize(src->size());

            destHisto->copyToHost(testHist);
            b3AlignedObjectArray<b3SortData> srcHost;
            src->copyToHost(srcHost);
        
            int blockSize = 256;
            int nBlocksPerWG = cdata.m_nBlocksPerWG;
            int startBit = ib;

            {
                for (int i=0;i<NUM_TABLES;i++)
                {
                    tables[i] = testHist[i*NUM_WGS];
                }
                
                //	distribute
                for(int i=0; i<n; i++)
                {
                    int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1);
                    
                    dstHostOK[tables[tableIdx] + counter2[tableIdx]] = srcHost[i];
                    counter2[tableIdx] ++;
                }

            
            }
            
            
            b3AlignedObjectArray<b3SortData> dstHost;
            dstHost.resize(src->size());
            
            
            int counter[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
            
            
            
            for (int wgIdx=0;wgIdx<NUM_WGS;wgIdx++)
            {
              int counter[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};

              int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;
                
              for(int iblock=0; iblock<b3Min(cdata.m_nBlocksPerWG, nBlocks); iblock++)
              {
                for (int lIdx = 0;lIdx < 64;lIdx++)
                {
                    int addr = iblock*blockSize + blockSize*cdata.m_nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;
                    
                    //	MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD
                    //	Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops
                    //	AMD: AtomInc performs better while NV prefers ++
                    for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++)
                    {
                        if( addr+j < n )
                        {
                          //  printf ("addr+j=%d\n", addr+j);
                            
                            int i = addr+j;
                            
                            int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1);
                            
                            int destIndex = testHist[tableIdx*NUM_WGS+wgIdx] + counter[tableIdx];
                            
                            b3SortData ok = dstHostOK[destIndex];
                                                    
                            if (ok.m_key != srcHost[i].m_key)
                            {
                                printf("ok.m_key = %d, srcHost[i].m_key = %d\n", ok.m_key,srcHost[i].m_key );
                                printf("(ok.m_value = %d, srcHost[i].m_value = %d)\n", ok.m_value,srcHost[i].m_value );
                            }
                            if (ok.m_value != srcHost[i].m_value)
                            {
                                
                               printf("ok.m_value = %d, srcHost[i].m_value = %d\n", ok.m_value,srcHost[i].m_value );
                                printf("(ok.m_key = %d, srcHost[i].m_key = %d)\n", ok.m_key,srcHost[i].m_key );

                            }
                   
                            dstHost[destIndex] = srcHost[i];
                            counter[tableIdx] ++;
                            
                        }
                    }
                }
              }
            }
            
         
#endif //SEQUENTIAL
            
            dst->copyFromHost(dstHost);
        }
#endif//USE_GPU
        
        
        
#ifdef DEBUG_RADIXSORT
		destHisto->copyToHost(testHist);
		printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size());
		for (int i=0;i<testHist.size();i++)
		{
			if (testHist[i]!=0)
				printf("testHist[%d]=%d\n",i,testHist[i]);
		}
#endif //DEBUG_RADIXSORT
		b3Swap(src, dst );
		b3Swap(srcHisto,destHisto);

#ifdef DEBUG_RADIXSORT2
        keyValuesInOut.copyToHost(test2);
        printf("numElem = %d\n",test2.size());
        for (int i=0;i<test2.size();i++)
        {
            if (test2[i].m_key != test2[i].m_value)
            {
                printf("test2[%d].m_key=%d\n",i,test2[i].m_key);
                printf("test2[%d].m_value=%d\n",i,test2[i].m_value);
            }
        }
#endif //DEBUG_RADIXSORT2
        
        count++;
                
        
	}
Example #18
0
void b3Island::Solve(const b3Vec3& gravity, float32 dt, u32 velocityIterations, u32 positionIterations, u32 flags)
{
	float32 h = dt;

	// 1. Integrate velocities
	for (u32 i = 0; i < m_bodyCount; ++i) 
	{
		b3Body* b = m_bodies[i];

		b3Vec3 v = b->m_linearVelocity;
		b3Vec3 w = b->m_angularVelocity;
		b3Vec3 x = b->m_sweep.worldCenter;
		b3Quat q = b->m_sweep.orientation;

		// Remember the positions for CCD
		b->m_sweep.worldCenter0 = b->m_sweep.worldCenter;
		b->m_sweep.orientation0 = b->m_sweep.orientation;

		if (b->m_type == e_dynamicBody) 
		{
			// Integrate forces
			v += h * (b->m_gravityScale * gravity + b->m_invMass * b->m_force);
			
			// Clear forces
			b->m_force.SetZero();
			
			// Integrate torques
			
			// Superposition Principle
			// w2 - w1 = dw1 + dw2 
			// w2 - w1 = h * I^1 * bt + h * I^1 * -gt
			// w2 = w1 + dw1 + dw2
						
			// Explicit Euler on current inertia and applied torque
			// w2 = w1 + h * I1^1 * bt1
			b3Vec3 dw1 = h * b->m_worldInvI * b->m_torque;
			
			// Implicit Euler on next inertia and angular velocity
			// w2 = w1 - h * I2^1 * cross(w2, I2 * w2)
			// w2 - w1 = -I2^1 * h * cross(w2, I2 * w2)
			// I2 * (w2 - w1) = -h * cross(w2, I2 * w2)
			// I2 * (w2 - w1) + h * cross(w2, I2 * w2) = 0
			// Toss out I2 from f using local I2 (constant) and local w1 
			// to remove its time dependency.
			b3Vec3 w2 = b3SolveGyro(q, b->m_I, w, h);
			b3Vec3 dw2 = w2 - w;

			w += dw1 + dw2;
			
			// Clear torques
			b->m_torque.SetZero();
			
			// Apply local damping.
			// ODE: dv/dt + c * v = 0
			// Solution: v(t) = v0 * exp(-c * t)
			// Step: v(t + dt) = v0 * exp(-c * (t + dt)) = v0 * exp(-c * t) * exp(-c * dt) = v * exp(-c * dt)
			// v2 = exp(-c * dt) * v1
			// Padé approximation:
			// 1 / (1 + c * dt) 
			v *= 1.0f / (1.0f + h * b->m_linearDamping);
			w *= 1.0f / (1.0f + h * b->m_angularDamping);
		}

		m_velocities[i].v = v;
		m_velocities[i].w = w;
		m_positions[i].x = x;
		m_positions[i].q = q;
		m_invInertias[i] = b->m_worldInvI;
	}

	b3JointSolverDef jointSolverDef;
	jointSolverDef.joints = m_joints;
	jointSolverDef.count = m_jointCount;
	jointSolverDef.positions = m_positions;
	jointSolverDef.velocities = m_velocities;
	jointSolverDef.invInertias = m_invInertias;
	jointSolverDef.dt = h;
	b3JointSolver jointSolver(&jointSolverDef);

	b3ContactSolverDef contactSolverDef;
	contactSolverDef.allocator = m_allocator;
	contactSolverDef.contacts = m_contacts;
	contactSolverDef.count = m_contactCount;
	contactSolverDef.positions = m_positions;
	contactSolverDef.velocities = m_velocities;
	contactSolverDef.invInertias = m_invInertias;
	contactSolverDef.dt = h;
	b3ContactSolver contactSolver(&contactSolverDef);

	// 2. Initialize constraints
	{
		B3_PROFILE("Initialize Constraints");
		
		contactSolver.InitializeConstraints();

		if (flags & e_warmStartBit)
		{
			contactSolver.WarmStart();
		}

		jointSolver.InitializeConstraints();

		if (flags & e_warmStartBit)
		{
			jointSolver.WarmStart();
		}
	}

	// 3. Solve velocity constraints
	{
		B3_PROFILE("Solve Velocity Constraints");

		for (u32 i = 0; i < velocityIterations; ++i)
		{
			jointSolver.SolveVelocityConstraints();
			contactSolver.SolveVelocityConstraints();
		}

		if (flags & e_warmStartBit)
		{
			contactSolver.StoreImpulses();
		}
	}

	// 4. Integrate positions
	for (u32 i = 0; i < m_bodyCount; ++i) 
	{
		b3Body* b = m_bodies[i];
		
		b3Vec3 x = m_positions[i].x;
		b3Quat q = m_positions[i].q;
		b3Vec3 v = m_velocities[i].v;
		b3Vec3 w = m_velocities[i].w;
		b3Mat33 invI = m_invInertias[i];

		// Prevent numerical instability due to large velocity changes.		
		b3Vec3 translation = h * v;
		if (b3Dot(translation, translation) > B3_MAX_TRANSLATION_SQUARED)
		{
			float32 ratio = B3_MAX_TRANSLATION / b3Length(translation);
			v *= ratio;
		}

		b3Vec3 rotation = h * w;
		if (b3Dot(rotation, rotation) > B3_MAX_ROTATION_SQUARED)
		{
			float32 ratio = B3_MAX_ROTATION / b3Length(rotation);
			w *= ratio;
		}

		// Integrate
		x += h * v;
		q = b3Integrate(q, w, h);
		invI = b3RotateToFrame(b->m_invI, q);

		m_positions[i].x = x;
		m_positions[i].q = q;
		m_velocities[i].v = v;
		m_velocities[i].w = w;
		m_invInertias[i] = invI;
	}

	// 5. Solve position constraints
	{
		B3_PROFILE("Solve Position Constraints");
		
		bool positionsSolved = false;
		for (u32 i = 0; i < positionIterations; ++i) 
		{
			bool contactsSolved = contactSolver.SolvePositionConstraints();
			bool jointsSolved = jointSolver.SolvePositionConstraints();
			if (contactsSolved && jointsSolved)
			{
				// Early out if the position errors are small.
				positionsSolved = true;
				break;
			}
		}
	}

	// 6. Copy state buffers back to the bodies
	for (u32 i = 0; i < m_bodyCount; ++i) 
	{
		b3Body* b = m_bodies[i];
		b->m_sweep.worldCenter = m_positions[i].x;
		b->m_sweep.orientation = m_positions[i].q;
		b->m_sweep.orientation.Normalize();
		b->m_linearVelocity = m_velocities[i].v;
		b->m_angularVelocity = m_velocities[i].w;	
		b->m_worldInvI = m_invInertias[i];
		
		b->SynchronizeTransform();
	}

	// 7. Put bodies under unconsiderable motion to sleep
	if (flags & e_sleepBit) 
	{
		float32 minSleepTime = B3_MAX_FLOAT;
		for (u32 i = 0; i < m_bodyCount; ++i) 
		{
			b3Body* b = m_bodies[i];
			if (b->m_type == e_staticBody) 
			{
				continue;
			}

			// Compute the linear and angular speed of the body.
			float32 sqrLinVel = b3Dot(b->m_linearVelocity, b->m_linearVelocity);
			float32 sqrAngVel = b3Dot(b->m_angularVelocity, b->m_angularVelocity);

			if (sqrLinVel > B3_SLEEP_LINEAR_TOL || sqrAngVel > B3_SLEEP_ANGULAR_TOL) 
			{
				minSleepTime = 0.0f;
				b->m_sleepTime = 0.0f;
			}
			else 
			{
				b->m_sleepTime += h;
				minSleepTime = b3Min(minSleepTime, b->m_sleepTime);
			}
		}

		// Put the island to sleep so long as the minimum found sleep time
		// is below the threshold. 
		if (minSleepTime >= B3_TIME_TO_SLEEP) 
		{
			for (u32 i = 0; i < m_bodyCount; ++i) 
			{
				m_bodies[i]->SetAwake(false);
			}
		}
	}
}