void b3GpuPgsContactSolver::solveContactConstraintBatchSizes(  const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, 
			b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations, const b3AlignedObjectArray<int>* batchSizes)//const b3OpenCLArray<int>* gpuBatchSizes)
{
	B3_PROFILE("solveContactConstraintBatchSizes");
	int numBatches = batchSizes->size()/B3_MAX_NUM_BATCHES;
	for(int iter=0; iter<numIterations; iter++)
	{
		
		for (int cellId=0;cellId<numBatches;cellId++)
		{
			int offset = 0;
			for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++)
			{
				int numInBatch = batchSizes->at(cellId*B3_MAX_NUM_BATCHES+ii);
				if (!numInBatch)
					break;

				{
					b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleContactKernel,"m_solveSingleContactKernel" );
					launcher.setBuffer(bodyBuf->getBufferCL() );
					launcher.setBuffer(shapeBuf->getBufferCL() );
					launcher.setBuffer(	constraint->getBufferCL() );
					launcher.setConst(cellId);
					launcher.setConst(offset);
					launcher.setConst(numInBatch);
					launcher.launch1D(numInBatch);
					offset+=numInBatch;
				}
			}
		}
	}


	for(int iter=0; iter<numIterations; iter++)
	{
		for (int cellId=0;cellId<numBatches;cellId++)
		{
			int offset = 0;
			for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++)
			{
				int numInBatch = batchSizes->at(cellId*B3_MAX_NUM_BATCHES+ii);
				if (!numInBatch)
					break;

				{
					b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleFrictionKernel,"m_solveSingleFrictionKernel" );
					launcher.setBuffer(bodyBuf->getBufferCL() );
					launcher.setBuffer(shapeBuf->getBufferCL() );
					launcher.setBuffer(	constraint->getBufferCL() );
					launcher.setConst(cellId);
					launcher.setConst(offset);
					launcher.setConst(numInBatch);
					launcher.launch1D(numInBatch);
					offset+=numInBatch;
				}
			}
		}
	}
}
Beispiel #2
0
void TinyRenderObjectData::registerMeshShape(const float* vertices, int numVertices, const int* indices, int numIndices, const float rgbaColor[4],
											 unsigned char* textureImage, int textureWidth, int textureHeight)
{
	if (0 == m_model)
	{
		{
			B3_PROFILE("setColorRGBA");

			m_model = new Model();
			m_model->setColorRGBA(rgbaColor);
		}
		if (textureImage)
		{
			{
				B3_PROFILE("setDiffuseTextureFromData");
				m_model->setDiffuseTextureFromData(textureImage, textureWidth, textureHeight);
			}
		}
		else
		{
			/*char relativeFileName[1024];
			if (b3ResourcePath::findResourcePath("floor_diffuse.tga", relativeFileName, 1024))
			{
				m_model->loadDiffuseTexture(relativeFileName);
			}
             */
		}
		{
			B3_PROFILE("reserveMemory");
			m_model->reserveMemory(numVertices, numIndices);
		}
		{
			B3_PROFILE("addVertex");
			for (int i = 0; i < numVertices; i++)
			{
				m_model->addVertex(vertices[i * 9],
								   vertices[i * 9 + 1],
								   vertices[i * 9 + 2],
								   vertices[i * 9 + 4],
								   vertices[i * 9 + 5],
								   vertices[i * 9 + 6],
								   vertices[i * 9 + 7],
								   vertices[i * 9 + 8]);
			}
		}
		{
			B3_PROFILE("addTriangle");
			for (int i = 0; i < numIndices; i += 3)
			{
				m_model->addTriangle(indices[i], indices[i], indices[i],
									 indices[i + 1], indices[i + 1], indices[i + 1],
									 indices[i + 2], indices[i + 2], indices[i + 2]);
			}
		}
	}
}
///todo: add some acceleration structure (AABBs, tree etc)
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{
	
	//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);

	B3_PROFILE("castRaysGPU");

	b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
	b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);

	{
		B3_PROFILE("raycast copyFromHost");
		gpuRays.copyFromHost(rays);

	
		gpuHitResults.resize(hitResults.size());
		gpuHitResults.copyFromHost(hitResults);
	}


	//run kernel
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(gpuRays.getBufferCL());
		launcher.setBuffer(gpuHitResults.getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
		
		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}

	//copy results
	{
		B3_PROFILE("raycast copyToHost");
		gpuHitResults.copyToHost(hitResults);
	}

}
Beispiel #4
0
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables)
{

//	return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);

	B3_PROFILE("castRaysHost");

	for (int r=0;r<rays.size();r++)
	{
		b3Vector3 rayFrom = rays[r].m_from;
		b3Vector3 rayTo = rays[r].m_to;

		//if there is a hit, color the pixels
		bool hits  = false;

		for (int b=0;b<numBodies && !hits;b++)
		{
				
			const b3Vector3& pos = bodies[b].m_pos;
			const b3Quaternion& orn = bodies[b].m_quat;
			
			b3Scalar radius = 1;

			if (sphere_intersect(pos,  radius, rayFrom, rayTo))
				hits = true;
		}
		if (hits)
			hitResults[r].m_hitFraction = 0.f;

	}
}
    // return non-null if there is a status, nullptr otherwise
    virtual const struct SharedMemoryStatus* processServerStatus()
    {
		
		{
			if (btIsExampleBrowserMainThreadTerminated(m_data))
			{
				PhysicsClientSharedMemory::disconnectSharedMemory();
			}
		}
			{	
	   		unsigned long int ms = m_clock.getTimeMilliseconds();
			if (ms>2)
			{ 
				B3_PROFILE("m_clock.reset()");

				btUpdateInProcessExampleBrowserMainThread(m_data);
				m_clock.reset(); 
			}
		}
		{
			b3Clock::usleep(0);
		}
		const SharedMemoryStatus* stat = 0;

		{
			stat = PhysicsClientSharedMemory::processServerStatus();
		}

		return stat;
        
    }
Beispiel #6
0
void ExplicitEuler::integrateExplicitEuler(struct CpuSoftClothDemoInternalData* clothData, char* vertexPositions, int vertexStride,float deltaTime)
{
	B3_PROFILE("integrateEuler");
	b3Vector3 deltaTimeVec = b3MakeVector3(deltaTime,deltaTime,deltaTime,0);

	int numPoints = clothData->m_particleMasses.size();
	
	for (int i=0;i<numPoints;i++)
	{
		float mass = clothData->m_particleMasses[i];
		if (mass)
		{
			


			b3Vector3 dv = (clothData->m_forces[i]/mass)*deltaTimeVec;
			clothData->m_velocities[i]+= dv;
			clothData->m_velocities[i]*=0.999;

			b3Vector3& pos = (b3Vector3&)vertexPositions[i*vertexStride];
			
			pos += clothData->m_velocities[i]*deltaTimeVec;
		}
	}
	
}
Beispiel #7
0
void ConvertURDF2Bullet(
    const URDFImporterInterface& u2b, MultiBodyCreationInterface& creation,
    const btTransform& rootTransformInWorldSpace,
    btMultiBodyDynamicsWorld* world1,
    bool createMultiBody, const char* pathPrefix, int flags, UrdfVisualShapeCache* cachedLinkGraphicsShapes)
{

	URDF2BulletCachedData cache;
    InitURDF2BulletCache(u2b,cache);
    int urdfLinkIndex = u2b.getRootLinkIndex();
	B3_PROFILE("ConvertURDF2Bullet");
	
	UrdfVisualShapeCache cachedLinkGraphicsShapesOut;

	ConvertURDF2BulletInternal(u2b, creation, cache, urdfLinkIndex,rootTransformInWorldSpace,world1,createMultiBody,pathPrefix,flags, cachedLinkGraphicsShapes, &cachedLinkGraphicsShapesOut);
	if (cachedLinkGraphicsShapes && cachedLinkGraphicsShapesOut.m_cachedUrdfLinkVisualShapeIndices.size() > cachedLinkGraphicsShapes->m_cachedUrdfLinkVisualShapeIndices.size())
	{
		*cachedLinkGraphicsShapes = cachedLinkGraphicsShapesOut;
	}

	if (world1 && cache.m_bulletMultiBody)
	{
		B3_PROFILE("Post process");
		btMultiBody* mb = cache.m_bulletMultiBody;

		mb->setHasSelfCollision((flags&CUF_USE_SELF_COLLISION)!=0);
		
		mb->finalizeMultiDof();

		btTransform localInertialFrameRoot = cache.m_urdfLinkLocalInertialFrames[urdfLinkIndex];

		if (flags & CUF_USE_MJCF)
		{
		} else
		{
			mb->setBaseWorldTransform(rootTransformInWorldSpace*localInertialFrameRoot);
		}
		btAlignedObjectArray<btQuaternion> scratch_q;
		btAlignedObjectArray<btVector3> scratch_m;
		mb->forwardKinematics(scratch_q,scratch_m);
		mb->updateCollisionObjectWorldTransforms(scratch_q,scratch_m);
		
		world1->addMultiBody(mb);
	}
}
Beispiel #8
0
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables)
{
	B3_PROFILE("castRaysGPU");

	b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
	gpuRays.copyFromHost(rays);

	b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);
	gpuHitResults.resize(hitResults.size());

	b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q);
	gpuBodies.resize(numBodies);
	gpuBodies.copyFromHostPointer(bodies,numBodies);

	b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q);
	gpuCollidables.resize(numCollidables);
	gpuCollidables.copyFromHostPointer(collidables,numCollidables);


	//run kernel
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel);
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(gpuRays.getBufferCL());
		launcher.setBuffer(gpuHitResults.getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(gpuBodies.getBufferCL());
		launcher.setBuffer(gpuCollidables.getBufferCL());

		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}

	//copy results
	gpuHitResults.copyToHost(hitResults);

}
void OpenGLExampleBrowser::updateGraphics()
{
	if (sCurrentDemo)
	{
			if (!pauseSimulation || singleStepSimulation)
			{
				B3_PROFILE("sCurrentDemo->updateGraphics");
				sCurrentDemo->updateGraphics();
			}
	}
}
Beispiel #10
0
void RigidBodyDemo::renderScene()
{
	{
		B3_PROFILE("writeSingleInstanceTransformToCPU");
		const b3RigidBodyData* bodies = m_rb->getBodyBuffer();
		//sync transforms
		int numBodies = m_rb->getNumBodies();
		for (int i=0;i<numBodies;i++)
		{
			m_instancingRenderer->writeSingleInstanceTransformToCPU(&bodies[i].m_pos.x,bodies[i].m_quat,i);
		}
	}
	{
		B3_PROFILE("writeTransforms");
		m_instancingRenderer->writeTransforms();
	}
	{
		B3_PROFILE("renderScene");
		m_instancingRenderer->renderScene();
	}
}
/// b3PgsJacobiSolver Sequentially applies impulses
b3Scalar b3GpuPgsConstraintSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias,
											  int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal)
{
	B3_PROFILE("solveJoints");
	//you need to provide at least some bodies

	solveGroupCacheFriendlySetup(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal);

	solveGroupCacheFriendlyIterations(gpuConstraints, numConstraints, infoGlobal);

	solveGroupCacheFriendlyFinish(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal);

	return 0.f;
}
Beispiel #12
0
void ExplicitEuler::computeGravityForces(struct CpuSoftClothDemoInternalData* clothData, char* vertexPositions, int vertexStride, float dt)
{
	
	B3_PROFILE("computeForces");
	int numPoints = clothData->m_particleMasses.size();

	b3Vector3 gravityAcceleration = b3MakeVector3(0,-9.8,0);
	//f=m*a
	for (int i=0;i<numPoints;i++)
	{
		{
			float	particleMass = clothData->m_particleMasses[i];

			b3Vector3 particleMassVec = b3MakeVector3(particleMass,particleMass,particleMass,0);
			clothData->m_forces[i] = gravityAcceleration*particleMass;
		}
	}

}
Beispiel #13
0
void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyCL>* bodyBuf,
                                     const b3OpenCLArray<b3InertiaCL>* shapeBuf,
                                     b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
                                     int nContacts, const ConstraintCfg& cfg )
{
    b3OpenCLArray<b3GpuConstraint4>* constraintNative =0;

    struct CB
    {
        int m_nContacts;
        float m_dt;
        float m_positionDrift;
        float m_positionConstraintCoeff;
    };

    {
        B3_PROFILE("m_contactToConstraintKernel");
        CB cdata;
        cdata.m_nContacts = nContacts;
        cdata.m_dt = cfg.m_dt;
        cdata.m_positionDrift = cfg.m_positionDrift;
        cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;


        b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()),
                                   b3BufferInfoCL( contactCOut->getBufferCL() )
                                 };
        b3LauncherCL launcher( m_queue, m_contactToConstraintKernel );
        launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
        //launcher.setConst(  cdata );

        launcher.setConst(cdata.m_nContacts);
        launcher.setConst(cdata.m_dt);
        launcher.setConst(cdata.m_positionDrift);
        launcher.setConst(cdata.m_positionConstraintCoeff);

        launcher.launch1D( nContacts, 64 );
        clFinish(m_queue);

    }

    contactCOut->resize(nContacts);
}
inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3(b3BatchConstraint* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies)
{
	//int sz = sizeof(b3BatchConstraint);

	B3_PROFILE("sortConstraintByBatch3");

	static int maxSwaps = 0;
	int numSwaps = 0;

	curUsed.resize(2 * simdWidth);

	static int maxNumConstraints = 0;
	if (maxNumConstraints < numConstraints)
	{
		maxNumConstraints = numConstraints;
		//printf("maxNumConstraints  = %d\n",maxNumConstraints );
	}

	int numUsedArray = numBodies / 32 + 1;
	bodyUsed.resize(numUsedArray);

	for (int q = 0; q < numUsedArray; q++)
		bodyUsed[q] = 0;

	int curBodyUsed = 0;

	int numIter = 0;

#if defined(_DEBUG)
	for (int i = 0; i < numConstraints; i++)
		cs[i].m_batchId = -1;
#endif

	int numValidConstraints = 0;
	//	int unprocessedConstraintIndex = 0;

	int batchIdx = 0;

	{
		B3_PROFILE("cpu batch innerloop");

		while (numValidConstraints < numConstraints)
		{
			numIter++;
			int nCurrentBatch = 0;
			//	clear flag
			for (int i = 0; i < curBodyUsed; i++)
				bodyUsed[curUsed[i] / 32] = 0;

			curBodyUsed = 0;

			for (int i = numValidConstraints; i < numConstraints; i++)
			{
				int idx = i;
				b3Assert(idx < numConstraints);
				//	check if it can go
				int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
				int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
				int bodyA = abs(bodyAS);
				int bodyB = abs(bodyBS);
				bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx;
				bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx;
				int aUnavailable = 0;
				int bUnavailable = 0;
				if (!aIsStatic)
				{
					aUnavailable = bodyUsed[bodyA / 32] & (1 << (bodyA & 31));
				}
				if (!aUnavailable)
					if (!bIsStatic)
					{
						bUnavailable = bodyUsed[bodyB / 32] & (1 << (bodyB & 31));
					}

				if (aUnavailable == 0 && bUnavailable == 0)  // ok
				{
					if (!aIsStatic)
					{
						bodyUsed[bodyA / 32] |= (1 << (bodyA & 31));
						curUsed[curBodyUsed++] = bodyA;
					}
					if (!bIsStatic)
					{
						bodyUsed[bodyB / 32] |= (1 << (bodyB & 31));
						curUsed[curBodyUsed++] = bodyB;
					}

					cs[idx].m_batchId = batchIdx;

					if (i != numValidConstraints)
					{
						b3Swap(cs[i], cs[numValidConstraints]);
						numSwaps++;
					}

					numValidConstraints++;
					{
						nCurrentBatch++;
						if (nCurrentBatch == simdWidth)
						{
							nCurrentBatch = 0;
							for (int i = 0; i < curBodyUsed; i++)
								bodyUsed[curUsed[i] / 32] = 0;
							curBodyUsed = 0;
						}
					}
				}
			}
			m_gpuData->m_batchSizes.push_back(nCurrentBatch);
			batchIdx++;
		}
	}

#if defined(_DEBUG)
	//		debugPrintf( "nBatches: %d\n", batchIdx );
	for (int i = 0; i < numConstraints; i++)
	{
		b3Assert(cs[i].m_batchId != -1);
	}
#endif

	if (maxSwaps < numSwaps)
	{
		maxSwaps = numSwaps;
		//printf("maxSwaps = %d\n", maxSwaps);
	}

	return batchIdx;
}
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal)
{
	//only create the batches once.
	//@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated
	B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");

	bool createBatches = m_gpuData->m_batchSizes.size() == 0;
	{
		if (createBatches)
		{
			m_gpuData->m_batchSizes.resize(0);

			{
				m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);

				B3_PROFILE("batch joints");
				b3Assert(batchConstraints.size() == numConstraints);
				int simdWidth = numConstraints + 1;
				int numBodies = m_tmpSolverBodyPool.size();
				sortConstraintByBatch3(&batchConstraints[0], numConstraints, simdWidth, m_staticIdx, numBodies);

				m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
			}
		}
		else
		{
			/*b3AlignedObjectArray<b3BatchConstraint> cpuCheckBatches;
			m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches);
			b3Assert(cpuCheckBatches.size()==batchConstraints.size());
			printf(".\n");
			*/
			//>copyFromHost(batchConstraints);
		}
		int maxIterations = infoGlobal.m_numIterations;

		bool useBatching = true;

		if (useBatching)
		{
			if (!useGpuSolveJointConstraintRows)
			{
				B3_PROFILE("copy to host");
				m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
				m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
				m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
				m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1);
				m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets);
				gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints);
			}

			for (int iteration = 0; iteration < maxIterations; iteration++)
			{
				int batchOffset = 0;
				int constraintOffset = 0;
				int numBatches = m_gpuData->m_batchSizes.size();
				for (int bb = 0; bb < numBatches; bb++)
				{
					int numConstraintsInBatch = m_gpuData->m_batchSizes[bb];

					if (useGpuSolveJointConstraintRows)
					{
						B3_PROFILE("solveJointConstraintRowsKernels");

						/*
						__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,
					  __global b3BatchConstraint* batchConstraints,
					  	__global b3SolverConstraint* rows,
						__global unsigned int* numConstraintRowsInfo1, 
						__global unsigned int* rowOffsets,
						__global b3GpuGenericConstraint* constraints,
						int batchOffset,
						int numConstraintsInBatch*/

						b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_solveJointConstraintRowsKernels, "m_solveJointConstraintRowsKernels");
						launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
						launcher.setBuffer(gpuConstraints1->getBufferCL());  //to detect disabled constraints
						launcher.setConst(batchOffset);
						launcher.setConst(numConstraintsInBatch);

						launcher.launch1D(numConstraintsInBatch);
					}
					else  //useGpu
					{
						for (int b = 0; b < numConstraintsInBatch; b++)
						{
							const b3BatchConstraint& c = batchConstraints[batchOffset + b];
							/*printf("-----------\n");
							printf("bb=%d\n",bb);
							printf("c.batchId = %d\n", c.m_batchId);
							*/
							b3Assert(c.m_batchId == bb);
							b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex];
							if (constraint->m_flags & B3_CONSTRAINT_FLAG_ENABLED)
							{
								int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex];
								int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex];

								for (int jj = 0; jj < numConstraintRows; jj++)
								{
									//
									b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset + jj];
									//resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
									resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint);
								}
							}
						}
					}  //useGpu
					batchOffset += numConstraintsInBatch;
					constraintOffset += numConstraintsInBatch;
				}
			}  //for (int iteration...

			if (!useGpuSolveJointConstraintRows)
			{
				{
					B3_PROFILE("copy from host");
					m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool);
					m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
					m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool);
				}

				//B3_PROFILE("copy to host");
				//m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
			}
			//int sz = sizeof(b3GpuSolverBody);
			//printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz);
		}
		else
		{
			for (int iteration = 0; iteration < maxIterations; iteration++)
			{
				int numJoints = m_tmpSolverNonContactConstraintPool.size();
				for (int j = 0; j < numJoints; j++)
				{
					b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j];
					resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint);
				}

				if (!m_usePgs)
				{
					averageVelocities();
				}
			}
		}
	}
	clFinish(m_gpuData->m_queue);
	return 0.f;
}
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal)
{
	B3_PROFILE("GPU solveGroupCacheFriendlySetup");
	batchConstraints.resize(numConstraints);
	m_gpuData->m_gpuBatchConstraints->resize(numConstraints);
	m_staticIdx = -1;
	m_maxOverrideNumSolverIterations = 0;

	/*	m_gpuData->m_gpuBodies->resize(numBodies);
	m_gpuData->m_gpuBodies->copyFromHostPointer(bodies,numBodies);

	b3OpenCLArray<b3InertiaData> gpuInertias(m_gpuData->m_context,m_gpuData->m_queue);
	gpuInertias.resize(numBodies);
	gpuInertias.copyFromHostPointer(inertias,numBodies);
	*/

	m_gpuData->m_gpuSolverBodies->resize(numBodies);

	m_tmpSolverBodyPool.resize(numBodies);
	{
		if (useGpuInitSolverBodies)
		{
			B3_PROFILE("m_initSolverBodiesKernel");

			b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initSolverBodiesKernel, "m_initSolverBodiesKernel");
			launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
			launcher.setBuffer(gpuBodies->getBufferCL());
			launcher.setConst(numBodies);
			launcher.launch1D(numBodies);
			clFinish(m_gpuData->m_queue);

			//			m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
		}
		else
		{
			gpuBodies->copyToHost(m_gpuData->m_cpuBodies);
			for (int i = 0; i < numBodies; i++)
			{
				b3RigidBodyData& body = m_gpuData->m_cpuBodies[i];
				b3GpuSolverBody& solverBody = m_tmpSolverBodyPool[i];
				initSolverBody(i, &solverBody, &body);
				solverBody.m_originalBodyIndex = i;
			}
			m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool);
		}
	}

	//	int totalBodies = 0;
	int totalNumRows = 0;
	//b3RigidBody* rb0=0,*rb1=0;
	//if (1)
	{
		{
			//			int i;

			m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);

			//			b3OpenCLArray<b3GpuGenericConstraint> gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue);

			if (useGpuInfo1)
			{
				B3_PROFILE("info1 and init batchConstraint");

				m_gpuData->m_gpuConstraintInfo1->resize(numConstraints);

				if (1)
				{
					B3_PROFILE("getInfo1Kernel");

					b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo1Kernel, "m_getInfo1Kernel");
					launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
					launcher.setBuffer(gpuConstraints->getBufferCL());
					launcher.setConst(numConstraints);
					launcher.launch1D(numConstraints);
					clFinish(m_gpuData->m_queue);
				}

				if (m_gpuData->m_batchSizes.size() == 0)
				{
					B3_PROFILE("initBatchConstraintsKernel");

					m_gpuData->m_gpuConstraintRowOffsets->resize(numConstraints);
					unsigned int total = 0;
					m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1, *m_gpuData->m_gpuConstraintRowOffsets, numConstraints, &total);
					unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints - 1);
					totalNumRows = total + lastElem;

					{
						B3_PROFILE("init batch constraints");
						b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initBatchConstraintsKernel, "m_initBatchConstraintsKernel");
						launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
						launcher.setBuffer(gpuConstraints->getBufferCL());
						launcher.setBuffer(gpuBodies->getBufferCL());
						launcher.setConst(numConstraints);
						launcher.launch1D(numConstraints);
						clFinish(m_gpuData->m_queue);
					}
					//assume the batching happens on CPU, so copy the data
					m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
				}
			}
			else
			{
				totalNumRows = 0;
				gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints);
				//calculate the total number of contraint rows
				for (int i = 0; i < numConstraints; i++)
				{
					unsigned int& info1 = m_tmpConstraintSizesPool[i];
					//					unsigned int info1;
					if (m_gpuData->m_cpuConstraints[i].isEnabled())
					{
						m_gpuData->m_cpuConstraints[i].getInfo1(&info1, &m_gpuData->m_cpuBodies[0]);
					}
					else
					{
						info1 = 0;
					}

					totalNumRows += info1;
				}

				m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
				m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool);
			}
			m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
			m_gpuData->m_gpuConstraintRows->resize(totalNumRows);

			//			b3GpuConstraintArray		verify;

			if (useGpuInfo2)
			{
				{
					B3_PROFILE("getInfo2Kernel");
					b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo2Kernel, "m_getInfo2Kernel");
					launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
					launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
					launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
					launcher.setBuffer(gpuConstraints->getBufferCL());
					launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
					launcher.setBuffer(gpuBodies->getBufferCL());
					launcher.setBuffer(gpuInertias->getBufferCL());
					launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
					launcher.setConst(infoGlobal.m_timeStep);
					launcher.setConst(infoGlobal.m_erp);
					launcher.setConst(infoGlobal.m_globalCfm);
					launcher.setConst(infoGlobal.m_damping);
					launcher.setConst(infoGlobal.m_numIterations);
					launcher.setConst(numConstraints);
					launcher.launch1D(numConstraints);
					clFinish(m_gpuData->m_queue);

					if (m_gpuData->m_batchSizes.size() == 0)
						m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
					//m_gpuData->m_gpuConstraintRows->copyToHost(verify);
					//m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
				}
			}
			else
			{
				gpuInertias->copyToHost(m_gpuData->m_cpuInertias);

				///setup the b3SolverConstraints

				for (int i = 0; i < numConstraints; i++)
				{
					const int& info1 = m_tmpConstraintSizesPool[i];

					if (info1)
					{
						int constraintIndex = batchConstraints[i].m_originalConstraintIndex;
						int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[constraintIndex];

						b3GpuSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[constraintRowOffset];
						b3GpuGenericConstraint& constraint = m_gpuData->m_cpuConstraints[i];

						b3RigidBodyData& rbA = m_gpuData->m_cpuBodies[constraint.getRigidBodyA()];
						//b3RigidBody& rbA = constraint.getRigidBodyA();
						//				b3RigidBody& rbB = constraint.getRigidBodyB();
						b3RigidBodyData& rbB = m_gpuData->m_cpuBodies[constraint.getRigidBodyB()];

						int solverBodyIdA = constraint.getRigidBodyA();  //getOrInitSolverBody(constraint.getRigidBodyA(),bodies,inertias);
						int solverBodyIdB = constraint.getRigidBodyB();  //getOrInitSolverBody(constraint.getRigidBodyB(),bodies,inertias);

						b3GpuSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
						b3GpuSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];

						if (rbA.m_invMass)
						{
							batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;
						}
						else
						{
							if (!solverBodyIdA)
								m_staticIdx = 0;
							batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;
						}

						if (rbB.m_invMass)
						{
							batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;
						}
						else
						{
							if (!solverBodyIdB)
								m_staticIdx = 0;
							batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;
						}

						int overrideNumSolverIterations = 0;  //constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
						if (overrideNumSolverIterations > m_maxOverrideNumSolverIterations)
							m_maxOverrideNumSolverIterations = overrideNumSolverIterations;

						int j;
						for (j = 0; j < info1; j++)
						{
							memset(&currentConstraintRow[j], 0, sizeof(b3GpuSolverConstraint));
							currentConstraintRow[j].m_angularComponentA.setValue(0, 0, 0);
							currentConstraintRow[j].m_angularComponentB.setValue(0, 0, 0);
							currentConstraintRow[j].m_appliedImpulse = 0.f;
							currentConstraintRow[j].m_appliedPushImpulse = 0.f;
							currentConstraintRow[j].m_cfm = 0.f;
							currentConstraintRow[j].m_contactNormal.setValue(0, 0, 0);
							currentConstraintRow[j].m_friction = 0.f;
							currentConstraintRow[j].m_frictionIndex = 0;
							currentConstraintRow[j].m_jacDiagABInv = 0.f;
							currentConstraintRow[j].m_lowerLimit = 0.f;
							currentConstraintRow[j].m_upperLimit = 0.f;

							currentConstraintRow[j].m_originalContactPoint = 0;
							currentConstraintRow[j].m_overrideNumSolverIterations = 0;
							currentConstraintRow[j].m_relpos1CrossNormal.setValue(0, 0, 0);
							currentConstraintRow[j].m_relpos2CrossNormal.setValue(0, 0, 0);
							currentConstraintRow[j].m_rhs = 0.f;
							currentConstraintRow[j].m_rhsPenetration = 0.f;
							currentConstraintRow[j].m_solverBodyIdA = 0;
							currentConstraintRow[j].m_solverBodyIdB = 0;

							currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;
							currentConstraintRow[j].m_upperLimit = B3_INFINITY;
							currentConstraintRow[j].m_appliedImpulse = 0.f;
							currentConstraintRow[j].m_appliedPushImpulse = 0.f;
							currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
							currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
							currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
						}

						bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f);
						bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f);
						bodyAPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f);
						bodyAPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f);
						bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f);
						bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f);
						bodyBPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f);
						bodyBPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f);

						b3GpuConstraintInfo2 info2;
						info2.fps = 1.f / infoGlobal.m_timeStep;
						info2.erp = infoGlobal.m_erp;
						info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
						info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
						info2.m_J2linearAxis = 0;
						info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
						info2.rowskip = sizeof(b3GpuSolverConstraint) / sizeof(b3Scalar);  //check this
						///the size of b3GpuSolverConstraint needs be a multiple of b3Scalar
						b3Assert(info2.rowskip * sizeof(b3Scalar) == sizeof(b3GpuSolverConstraint));
						info2.m_constraintError = &currentConstraintRow->m_rhs;
						currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
						info2.m_damping = infoGlobal.m_damping;
						info2.cfm = &currentConstraintRow->m_cfm;
						info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
						info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
						info2.m_numIterations = infoGlobal.m_numIterations;
						m_gpuData->m_cpuConstraints[i].getInfo2(&info2, &m_gpuData->m_cpuBodies[0]);

						///finalize the constraint setup
						for (j = 0; j < info1; j++)
						{
							b3GpuSolverConstraint& solverConstraint = currentConstraintRow[j];

							if (solverConstraint.m_upperLimit >= m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold())
							{
								solverConstraint.m_upperLimit = m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold();
							}

							if (solverConstraint.m_lowerLimit <= -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold())
							{
								solverConstraint.m_lowerLimit = -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold();
							}

							//						solverConstraint.m_originalContactPoint = constraint;

							b3Matrix3x3& invInertiaWorldA = m_gpuData->m_cpuInertias[constraint.getRigidBodyA()].m_invInertiaWorld;
							{
								//b3Vector3 angularFactorA(1,1,1);
								const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
								solverConstraint.m_angularComponentA = invInertiaWorldA * ftorqueAxis1;  //*angularFactorA;
							}

							b3Matrix3x3& invInertiaWorldB = m_gpuData->m_cpuInertias[constraint.getRigidBodyB()].m_invInertiaWorld;
							{
								const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
								solverConstraint.m_angularComponentB = invInertiaWorldB * ftorqueAxis2;  //*constraint.getRigidBodyB().getAngularFactor();
							}

							{
								//it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal
								//because it gets multiplied iMJlB
								b3Vector3 iMJlA = solverConstraint.m_contactNormal * rbA.m_invMass;
								b3Vector3 iMJaA = invInertiaWorldA * solverConstraint.m_relpos1CrossNormal;
								b3Vector3 iMJlB = solverConstraint.m_contactNormal * rbB.m_invMass;  //sign of normal?
								b3Vector3 iMJaB = invInertiaWorldB * solverConstraint.m_relpos2CrossNormal;

								b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
								sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
								sum += iMJlB.dot(solverConstraint.m_contactNormal);
								sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
								b3Scalar fsum = b3Fabs(sum);
								b3Assert(fsum > B3_EPSILON);
								solverConstraint.m_jacDiagABInv = fsum > B3_EPSILON ? b3Scalar(1.) / sum : 0.f;
							}

							///fix rhs
							///todo: add force/torque accelerators
							{
								b3Scalar rel_vel;
								b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel);
								b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel);

								rel_vel = vel1Dotn + vel2Dotn;

								b3Scalar restitution = 0.f;
								b3Scalar positionalError = solverConstraint.m_rhs;  //already filled in by getConstraintInfo2
								b3Scalar velocityError = restitution - rel_vel * info2.m_damping;
								b3Scalar penetrationImpulse = positionalError * solverConstraint.m_jacDiagABInv;
								b3Scalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv;
								solverConstraint.m_rhs = penetrationImpulse + velocityImpulse;
								solverConstraint.m_appliedImpulse = 0.f;
							}
						}
					}
				}

				m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool);
				m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool);

				if (m_gpuData->m_batchSizes.size() == 0)
					m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
				else
					m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);

				m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool);

			}  //end useGpuInfo2
		}

#ifdef B3_SUPPORT_CONTACT_CONSTRAINTS
		{
			int i;

			for (i = 0; i < numManifolds; i++)
			{
				b3Contact4& manifold = manifoldPtr[i];
				convertContact(bodies, inertias, &manifold, infoGlobal);
			}
		}
#endif  //B3_SUPPORT_CONTACT_CONSTRAINTS
	}

	//	b3ContactSolverInfo info = infoGlobal;

	//	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
	//	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
	//	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();

	return 0.f;
}
Beispiel #17
0
void GLInstancingRenderer::init()
{
	b3Assert(glGetError() ==GL_NO_ERROR);


	glEnable(GL_DEPTH_TEST);
	glDepthFunc(GL_LESS);

    b3Assert(glGetError() ==GL_NO_ERROR);

//	glClearColor(float(0.),float(0.),float(0.4),float(0));

    b3Assert(glGetError() ==GL_NO_ERROR);


    b3Assert(glGetError() ==GL_NO_ERROR);

	{
		B3_PROFILE("texture");
		if(m_textureenabled)
		{
			if(!m_textureinitialized)
			{
				glActiveTexture(GL_TEXTURE0);

				GLubyte*	image=new GLubyte[256*256*3];
				for(int y=0;y<256;++y)
				{
//					const int	t=y>>5;
					GLubyte*	pi=image+y*256*3;
					for(int x=0;x<256;++x)
					{
						if (x<2||y<2||x>253||y>253)
						{
							pi[0]=255;//0;
							pi[1]=255;//0;
							pi[2]=255;//0;
						} else
						{
							pi[0]=255;
							pi[1]=255;
							pi[2]=255;
						}

						/*
						const int		s=x>>5;
						const GLubyte	b=180;
						GLubyte			c=b+((s+t&1)&1)*(255-b);
						pi[0]=c;
						pi[1]=c;
						pi[2]=c;
						*/

						pi+=3;
					}
				}

				glGenTextures(1,(GLuint*)&m_data->m_defaultTexturehandle);
				glBindTexture(GL_TEXTURE_2D,m_data->m_defaultTexturehandle);
				b3Assert(glGetError() ==GL_NO_ERROR);

				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 256,256,0,GL_RGB,GL_UNSIGNED_BYTE,image);
				glGenerateMipmap(GL_TEXTURE_2D);

				b3Assert(glGetError() ==GL_NO_ERROR);

				delete[] image;
				m_textureinitialized=true;
			}

			b3Assert(glGetError() ==GL_NO_ERROR);

			glBindTexture(GL_TEXTURE_2D,m_data->m_defaultTexturehandle);
			b3Assert(glGetError() ==GL_NO_ERROR);


		} else
		{
			glDisable(GL_TEXTURE_2D);
			b3Assert(glGetError() ==GL_NO_ERROR);
		}
	}
	//glEnable(GL_COLOR_MATERIAL);

	b3Assert(glGetError() ==GL_NO_ERROR);

	//	  glEnable(GL_CULL_FACE);
	//	  glCullFace(GL_BACK);
}
Beispiel #18
0
void GLInstancingRenderer::renderSceneInternal(int renderMode)
{

//	glEnable(GL_DEPTH_TEST);

	GLint dims[4];
        glGetIntegerv(GL_VIEWPORT, dims);
	//we need to get the viewport dims, because on Apple Retina the viewport dimension is different from screenWidth
	//printf("dims=%d,%d,%d,%d\n",dims[0],dims[1],dims[2],dims[3]);
	// Accept fragment if it closer to the camera than the former one
	//glDepthFunc(GL_LESS);

	// Cull triangles which normal is not towards the camera
	glEnable(GL_CULL_FACE);



	 B3_PROFILE("GLInstancingRenderer::RenderScene");

	 {
		B3_PROFILE("init");
		init();
	 }


    b3Assert(glGetError() ==GL_NO_ERROR);

	float depthProjectionMatrix[4][4];
	GLfloat depthModelViewMatrix[4][4];
	//GLfloat depthModelViewMatrix2[4][4];

	// Compute the MVP matrix from the light's point of view
	if (renderMode==B3_CREATE_SHADOWMAP_RENDERMODE)
	{
		glEnable(GL_CULL_FACE);
		glCullFace(GL_FRONT);

		if (!m_data->m_shadowMap)
		{
			glActiveTexture(GL_TEXTURE0);

			glGenTextures(1,&m_data->m_shadowTexture);
			glBindTexture(GL_TEXTURE_2D,m_data->m_shadowTexture);
			//glTexImage2D(GL_TEXTURE_2D,0,GL_DEPTH_COMPONENT16,m_screenWidth,m_screenHeight,0,GL_DEPTH_COMPONENT,GL_FLOAT,0);
			//glTexImage2D(GL_TEXTURE_2D,0,GL_DEPTH_COMPONENT32,m_screenWidth,m_screenHeight,0,GL_DEPTH_COMPONENT,GL_FLOAT,0);

#ifdef OLD_SHADOWMAP_INIT
			glTexImage2D(GL_TEXTURE_2D, 0,GL_DEPTH_COMPONENT16, shadowMapWidth, shadowMapHeight, 0,GL_DEPTH_COMPONENT, GL_FLOAT, 0);
#else//OLD_SHADOWMAP_INIT
			//Reduce size of shadowMap if glTexImage2D call fails as may happen in some cases
			//https://github.com/bulletphysics/bullet3/issues/40
			
			int size;
			glGetIntegerv(GL_MAX_TEXTURE_SIZE, &size);
			if (size < shadowMapWidth){
				shadowMapWidth = size;
			}
			if (size < shadowMapHeight){
				shadowMapHeight = size;
			}
			GLuint err;
			do {
				glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, 
					shadowMapWidth, shadowMapHeight, 
					0, GL_DEPTH_COMPONENT, GL_FLOAT, 0);
				err = glGetError();
				if (err!=GL_NO_ERROR){
					shadowMapHeight >>= 1;
					shadowMapWidth >>= 1;
				}
			} while (err != GL_NO_ERROR && shadowMapWidth > 0);
#endif//OLD_SHADOWMAP_INIT

			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

			float l_ClampColor[] = {1.0, 1.0, 1.0, 1.0};
			glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, l_ClampColor);
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
//			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
//			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL);
			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_COMPARE_REF_TO_TEXTURE);

			m_data->m_shadowMap=new GLRenderToTexture();
			m_data->m_shadowMap->init(shadowMapWidth, shadowMapHeight,m_data->m_shadowTexture,RENDERTEXTURE_DEPTH);
		}
Beispiel #19
0
///todo: add some acceleration structure (AABBs, tree etc)
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, 
		const struct b3GpuNarrowPhaseInternalData* narrowphaseData,	class b3GpuBroadphaseInterface* broadphase)
{
	//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);

	B3_PROFILE("castRaysGPU");
	
	{
		B3_PROFILE("raycast copyFromHost");
		m_data->m_gpuRays->copyFromHost(rays);
		m_data->m_gpuHitResults->copyFromHost(hitResults);
		
	}
	
	int numRays = hitResults.size();
	{
		m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
		m_data->m_numRayRigidPairsPerRay->resize(numRays);
		
		m_data->m_gpuNumRayRigidPairs->resize(1);
		m_data->m_gpuRayRigidPairs->resize(numRays * 16);
	}
	
	//run kernel
	const bool USE_BRUTE_FORCE_RAYCAST = false;
	if(USE_BRUTE_FORCE_RAYCAST)
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
		launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
		
		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}
	else
	{
		m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() );

		m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
		
		int numRayRigidPairs = -1;
		m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
		if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() )
		{
			numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
			m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
		}
		
		m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs);	//Radix sort needs b3OpenCLArray::size() to be correct
		
		//Sort ray-rigid pairs by ray index
		{
			B3_PROFILE("sort ray-rigid pairs");
			m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) );
		}
		
		//detect start,count of each ray pair
		{
			B3_PROFILE("detect ray-rigid pair index ranges");
			
			{
				B3_PROFILE("reset ray-rigid pair index ranges");
				
				m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays);	//atomic_min used to find first index
				m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
				clFinish(m_data->m_q);
			}
			
			b3BufferInfoCL bufferInfo[] = 
			{
				b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ),
				
				b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
				b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() )
			};
			
			b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
			launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(numRayRigidPairs);
			
			launcher.launch1D(numRayRigidPairs);
			clFinish(m_data->m_q);
		}
		
		{
			B3_PROFILE("ray-rigid intersection");
			
			b3BufferInfoCL bufferInfo[] = 
			{
				b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ),
				b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ),
				b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
				b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ),
				
				b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ),
				
				b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() )
			};
			
			b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
			launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(numRays);
			
			launcher.launch1D(numRays);
			clFinish(m_data->m_q);
		}
	}
	
	

	//copy results
	{
		B3_PROFILE("raycast copyToHost");
		m_data->m_gpuHitResults->copyToHost(hitResults);
	}

}
Beispiel #20
0
int main(int argc, char* argv[])
{

	int sz = sizeof(b3Generic6DofConstraint);
	int sz2 = sizeof(b3Point2PointConstraint);
	int sz3 = sizeof(b3TypedConstraint);
	int sz4 = sizeof(b3TranslationalLimitMotor);
	int sz5 = sizeof(b3RotationalLimitMotor);
	int sz6 = sizeof(b3Transform);

	//b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache");


	b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile);
	b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile);


	b3SetCustomPrintfFunc(myprintf);
	b3Vector3 test=b3MakeVector3(1,2,3);
	test.x = 1;
	test.y = 4;

    printf("main start");

	b3CommandLineArgs args(argc,argv);
	ParticleDemo::ConstructionInfo ci;

	if (args.CheckCmdLineFlag("help"))
	{
		Usage();
		return 0;
	}

	selectedDemo =  loadCurrentDemoEntry(sStartFileName);


	args.GetCmdLineArgument("selected_demo",selectedDemo);


	if (args.CheckCmdLineFlag("new_batching"))
	{
		useNewBatchingKernel = true;
	}
	bool benchmark=args.CheckCmdLineFlag("benchmark");
	args.GetCmdLineArgument("max_framecount",maxFrameCount);

	args.GetCmdLineArgument("shadowmap_size",shadowMapWorldSize);

	args.GetCmdLineArgument("shadowmap_resolution",shadowMapWidth);
	shadowMapHeight=shadowMapWidth;
	if (args.CheckCmdLineFlag("disable_shadowmap"))
	{
		useShadowMap = false;
	}

	args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName);

	gDebugLauncherCL = args.CheckCmdLineFlag("debug_kernel_launch");

	dump_timings=args.CheckCmdLineFlag("dump_timings");
	ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl");
	ci.m_useConcaveMesh = true;//args.CheckCmdLineFlag("use_concave_mesh");
	if (ci.m_useConcaveMesh)
	{
		enableExperimentalCpuConcaveCollision = true;
	}
	ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes");
	args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex);
	args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex);
	gAllowCpuOpenCL = args.CheckCmdLineFlag("allow_opencl_cpu");
	gUseLargeBatches = args.CheckCmdLineFlag("use_large_batches");

	gUseJacobi = args.CheckCmdLineFlag("use_jacobi");
	gUseDbvt = args.CheckCmdLineFlag("use_dbvt");
	gDumpContactStats = args.CheckCmdLineFlag("dump_contact_stats");
	gCalcWorldSpaceAabbOnCpu = args.CheckCmdLineFlag("calc_aabb_cpu");
	gUseCalculateOverlappingPairsHost = args.CheckCmdLineFlag("calc_pairs_cpu");
	gIntegrateOnCpu = args.CheckCmdLineFlag("integrate_cpu");
	gConvertConstraintOnCpu = args.CheckCmdLineFlag("convert_constraints_cpu");
	useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid");




	args.GetCmdLineArgument("x_dim", ci.arraySizeX);
	args.GetCmdLineArgument("y_dim", ci.arraySizeY);
	args.GetCmdLineArgument("z_dim", ci.arraySizeZ);
	args.GetCmdLineArgument("x_gap", ci.gapX);
	args.GetCmdLineArgument("y_gap", ci.gapY);
	args.GetCmdLineArgument("z_gap", ci.gapZ);

	gPause = args.CheckCmdLineFlag("paused");

	gDebugForceLoadingFromSource = args.CheckCmdLineFlag("load_cl_kernels_from_disk");
	gDebugSkipLoadingBinary = args.CheckCmdLineFlag("disable_cached_cl_kernels");


#ifndef B3_NO_PROFILE
	b3ProfileManager::Reset();
#endif //B3_NO_PROFILE


	window = new b3gDefaultOpenGLWindow();

	b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight);

	window->createWindow(wci);
	window->setResizeCallback(MyResizeCallback);
	window->setMouseMoveCallback(MyMouseMoveCallback);
	window->setMouseButtonCallback(MyMouseButtonCallback);
	window->setKeyboardCallback(MyKeyboardCallback);

	window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org");
	printf("-----------------------------------------------------\n");




#ifndef __APPLE__
	glewInit();
#endif

	gui = new GwenUserInterface();

    printf("started GwenUserInterface");


	GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight);

	stash = initFont(&prim);


	if (gui)
	{
		gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale());

		printf("init fonts");


		gui->setToggleButtonCallback(MyButtonCallback);

		gui->registerToggleButton(MYPAUSE,"Pause");
		gui->registerToggleButton(MYPROFILE,"Profile");
		gui->registerToggleButton(MYRESET,"Reset");

		int numItems = sizeof(allDemos)/sizeof(ParticleDemo::CreateFunc*);
		demoNames.clear();
		for (int i=0;i<numItems;i++)
		{
			GpuDemo* demo = allDemos[i]();
			demoNames.push_back(demo->getName());
			delete demo;
		}

		gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0],selectedDemo);
		gui->setComboBoxCallback(MyComboBoxCallback);
	}



	do
	{
		bool syncOnly = false;
		gReset = false;

			{
		GLint err;
		glEnable(GL_BLEND);
		err = glGetError();
		b3Assert(err==GL_NO_ERROR);
		glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
		glDisable(GL_DEPTH_TEST);
		err = glGetError();
		b3Assert(err==GL_NO_ERROR);
		window->startRendering();
		glClearColor(1,1,1,1);
		glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT);
		glEnable(GL_DEPTH_TEST);

		sth_begin_draw(stash);
		//sth_draw_text(stash, droidRegular,12.f, dx, dy-50, "How does this OpenGL True Type font look? ", &dx,width,height);
		int spacing = 0;//g_OpenGLHeight;
		float sx,sy,dx,dy,lh;
		sx = 0;
		sy = g_OpenGLHeight;
		dx = sx; dy = sy;
		//if (1)
		const char* msg[] = {"Please wait, initializing the OpenCL demo",
			"Please make sure to run the demo on a high-end discrete GPU with OpenCL support",
			"The first time it can take a bit longer to compile the OpenCL kernels.",
			"Check the console if it takes longer than 1 minute or if a demos has issues.",
			"Please share the full commandline output when reporting issues:",
			"App_Bullet3_OpenCL_Demos_* >> error.log",

			"",
			"",
#ifdef _DEBUG
			"Some of the demos load a large .obj file,",
			"please use an optimized build of this app for faster parsing",

			"",
			"",
#endif
			"You can press F1 to create a single screenshot,",
			"or press F2 toggle screenshot (useful to create movies)",
			"",
			"",
			"There are various command-line options such as --benchmark",
			"See http://github.com/erwincoumans/bullet3 for more information"
		};
		int fontSize = 68;

		int nummsg = sizeof(msg)/sizeof(const char*);
		for (int i=0;i<nummsg;i++)
		{
			char txt[512];
			sprintf(txt,"%s",msg[i]);
				//sth_draw_text(stash, droidRegular,i, 10, dy-spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight);
				sth_draw_text(stash, droidRegular,fontSize, 10, spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight);
				spacing+=fontSize;
			fontSize = 32;
		}

		sth_end_draw(stash);
		sth_flush_draw(stash);
		window->endRendering();
	}



	static bool once=true;


	//glClearColor(0.3f, 0.3f, 0.3f, 1.0f);
	glClearColor(1,1,1,1);
	glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);

	window->setWheelCallback(b3DefaultWheelCallback);




	{
		GpuDemo* demo = allDemos[selectedDemo]();
		sDemo = demo;
//		demo->myinit();
		bool useGpu = false;


		//int maxObjectCapacity=128*1024;
		int maxObjectCapacity=1024*1024;
		maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10);

		{
		ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity);//render.getInstancingRenderer();
		ci.m_window = window;
		ci.m_gui = gui;
		ci.m_instancingRenderer->init();
		ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
		ci.m_instancingRenderer->InitShaders();
		ci.m_primRenderer = &prim;
//		render.init();
		}

		{
			demo->initPhysics(ci);
		}





		printf("-----------------------------------------------------\n");

		FILE* csvFile = 0;
		FILE* detailsFile = 0;

		if (benchmark)
		{

			char prefixFileName[1024];
			char csvFileName[1024];
			char detailsFileName[1024];

			b3OpenCLDeviceInfo info;
			b3OpenCLUtils::getDeviceInfo(demo->getInternalData()->m_clDevice,&info);

			//todo: move this time stuff into the Platform/Window class
#ifdef _WIN32
			SYSTEMTIME time;
			GetLocalTime(&time);
			char buf[1024];
			DWORD dwCompNameLen = 1024;
			if (0 != GetComputerName(buf, &dwCompNameLen))
			{
				printf("%s", buf);
			} else
			{
				printf("unknown", buf);
			}

			sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond);

#else
			timeval now;
			gettimeofday(&now,0);

			struct tm* ptm;
			ptm = localtime (&now.tv_sec);
			char buf[1024];
#ifdef __APPLE__
			sprintf(buf,"MacOSX");
#else
			sprintf(buf,"Unix");
#endif
			sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,
					ptm->tm_mday,
					ptm->tm_mon+1,
					ptm->tm_year+1900,
					ptm->tm_hour,
					ptm->tm_min,
					ptm->tm_sec);

#endif

			sprintf(csvFileName,"%s.csv",prefixFileName);
			sprintf(detailsFileName,"%s.txt",prefixFileName);
			printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName);

			//GetSystemTime(&time2);

			csvFile=fopen(csvFileName,"w");
			detailsFile = fopen(detailsFileName,"w");
			if (detailsFile)
				defaultOutput = detailsFile;

			//if (f)
			//	fprintf(f,"%s (%dx%dx%d=%d),\n",  g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ);
		}


		fprintf(defaultOutput,"Demo settings:\n");
		fprintf(defaultOutput,"  SelectedDemo=%d, demoname = %s\n", selectedDemo, demo->getName());
		fprintf(defaultOutput,"  x_dim=%d, y_dim=%d, z_dim=%d\n",ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ);
		fprintf(defaultOutput,"  x_gap=%f, y_gap=%f, z_gap=%f\n",ci.gapX,ci.gapY,ci.gapZ);
		fprintf(defaultOutput,"\nOpenCL settings:\n");
		fprintf(defaultOutput,"  Preferred cl_device index %d\n", ci.preferredOpenCLDeviceIndex);
		fprintf(defaultOutput,"  Preferred cl_platform index%d\n", ci.preferredOpenCLPlatformIndex);
		fprintf(defaultOutput,"\n");

		if (demo->getInternalData()->m_platformId)
		{
			b3OpenCLUtils::printPlatformInfo( demo->getInternalData()->m_platformId);
			fprintf(defaultOutput,"\n");
			b3OpenCLUtils::printDeviceInfo( demo->getInternalData()->m_clDevice);
			fprintf(defaultOutput,"\n");
		}
		do
		{


			GLint err = glGetError();
			assert(err==GL_NO_ERROR);


			if (exportFrame || exportMovie)
			{

				if (!renderTexture)
				{
					renderTexture = new GLRenderToTexture();
					GLuint renderTextureId;
					glGenTextures(1, &renderTextureId);

					// "Bind" the newly created texture : all future texture functions will modify this texture
					glBindTexture(GL_TEXTURE_2D, renderTextureId);

					// Give an empty image to OpenGL ( the last "0" )
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0);
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);
					glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);

					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);

					renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR);
				}

				bool result = renderTexture->enable();
			}

			err = glGetError();
			assert(err==GL_NO_ERROR);

			b3ProfileManager::Reset();
			b3ProfileManager::Increment_Frame_Counter();

//			render.reshape(g_OpenGLWidth,g_OpenGLHeight);
			ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
			prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			window->startRendering();

			err = glGetError();
			assert(err==GL_NO_ERROR);

			glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT);
			glEnable(GL_DEPTH_TEST);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			if (!gPause)
			{
				B3_PROFILE("clientMoveAndDisplay");

				demo->clientMoveAndDisplay();
			}
			else
			{

			}

			{
				B3_PROFILE("renderScene");
				demo->renderScene();
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			/*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects())
			{
				B3_PROFILE("renderPhysicsWorld");
				b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray();
				b3CollisionObject** colObjArray = &arr[0];

				render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly);
				syncOnly = true;

			}
			*/


			if (exportFrame || exportMovie)
			{

				char fileName[1024];
				sprintf(fileName,"screenShot%d.png",frameIndex++);
				writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName);
				exportFrame = false;
				renderTexture->disable();
			}


			{
				B3_PROFILE("gui->draw");
				if (gui && gDrawGui)
					gui->draw(g_OpenGLWidth,g_OpenGLHeight);
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			{
				B3_PROFILE("window->endRendering");
				window->endRendering();
			}

			err = glGetError();
			assert(err==GL_NO_ERROR);

			{
				B3_PROFILE("glFinish");
			}



			if (dump_timings)
			{
				b3ProfileManager::dumpAll(stdout);
			}

			if (csvFile)
			{
				static int frameCount=0;

				if (frameCount>0)
				{
					DumpSimulationTime(csvFile);
					if (detailsFile)
					{
							fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount);
							b3ProfileManager::dumpAll(detailsFile);
					}
				}

				if (frameCount>=maxFrameCount)
					window->setRequestExit();
				frameCount++;
			}


			if (gStep)
				gPause=true;

		} while (!window->requestedExit() && !gReset);


		demo->exitPhysics();
		b3ProfileManager::CleanupMemory();
		delete ci.m_instancingRenderer;

		delete demo;
		sDemo = 0;

		if (detailsFile)
		{
			fclose(detailsFile);
			detailsFile=0;
		}
		if (csvFile)
		{
			fclose(csvFile);
			csvFile=0;
		}
	}



	} while (gReset);


	if (gui)
		gui->setComboBoxCallback(0);

	{



		delete gui;
		gui=0;

		exitFont();


		window->closeWindow();
		delete window;
		window = 0;

	}

	return 0;
}
inline int b3GpuPgsContactSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
{
	
	B3_PROFILE("sortConstraintByBatch2");
	

	
	bodyUsed2.resize(2*simdWidth);

	for (int q=0;q<2*simdWidth;q++)
		bodyUsed2[q]=0;

	int curBodyUsed = 0;

	int numIter = 0;
    
	m_data->m_sortData.resize(numConstraints);
	m_data->m_idxBuffer.resize(numConstraints);
	m_data->m_old.resize(numConstraints);
	
	unsigned int* idxSrc = &m_data->m_idxBuffer[0];
		
#if defined(_DEBUG)
	for(int i=0; i<numConstraints; i++)
		cs[i].getBatchIdx() = -1;
#endif
	for(int i=0; i<numConstraints; i++) 
		idxSrc[i] = i;
    
	int numValidConstraints = 0;
	int unprocessedConstraintIndex = 0;

	int batchIdx = 0;
    

	{
		B3_PROFILE("cpu batch innerloop");
		
		while( numValidConstraints < numConstraints)
		{
			numIter++;
			int nCurrentBatch = 0;
			//	clear flag
			for(int i=0; i<curBodyUsed; i++) 
				bodyUsed2[i] = 0;
            curBodyUsed = 0;

			for(int i=numValidConstraints; i<numConstraints; i++)
			{
				int idx = idxSrc[i];
				b3Assert( idx < numConstraints );
				//	check if it can go
				int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
				int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
				int bodyA = abs(bodyAS);
				int bodyB = abs(bodyBS);
				bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
				bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
				int aUnavailable = 0;
				int bUnavailable = 0;
				if (!aIsStatic)
				{
					for (int j=0;j<curBodyUsed;j++)
					{
						if (bodyA == bodyUsed2[j])
						{
							aUnavailable=1;
							break;
						}
					}
				}
				if (!aUnavailable)
				if (!bIsStatic)
				{
					for (int j=0;j<curBodyUsed;j++)
					{
						if (bodyB == bodyUsed2[j])
						{
							bUnavailable=1;
							break;
						}
					}
				}
                
				if( aUnavailable==0 && bUnavailable==0 ) // ok
				{
					if (!aIsStatic)
					{
						bodyUsed2[curBodyUsed++] = bodyA;
					}
					if (!bIsStatic)
					{
						bodyUsed2[curBodyUsed++] = bodyB;
					}

					cs[idx].getBatchIdx() = batchIdx;
					m_data->m_sortData[idx].m_key = batchIdx;
					m_data->m_sortData[idx].m_value = idx;

					if (i!=numValidConstraints)
					{
						b3Swap(idxSrc[i], idxSrc[numValidConstraints]);
					}

					numValidConstraints++;
					{
						nCurrentBatch++;
						if( nCurrentBatch == simdWidth )
						{
							nCurrentBatch = 0;
							for(int i=0; i<curBodyUsed; i++) 
								bodyUsed2[i] = 0;

							
							curBodyUsed = 0;
						}
					}
				}
			}
			
			batchIdx ++;
		}
	}
	{
		B3_PROFILE("quickSort");
		//m_data->m_sortData.quickSort(sortfnc);
	}

	{
        B3_PROFILE("reorder");
		//	reorder
		
		memcpy( &m_data->m_old[0], cs, sizeof(b3Contact4)*numConstraints);

		for(int i=0; i<numConstraints; i++)
		{
			b3Assert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]);
			int idx = m_data->m_sortData[idxSrc[i]].m_value;
			cs[i] = m_data->m_old[idx];
		}
	}
	
#if defined(_DEBUG)
    //		debugPrintf( "nBatches: %d\n", batchIdx );
	for(int i=0; i<numConstraints; i++)
    {
        b3Assert( cs[i].getBatchIdx() != -1 );
    }
#endif

	
	return batchIdx;
}
inline int b3GpuPgsContactSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
{
	
	B3_PROFILE("sortConstraintByBatch");
	int numIter = 0;
    
	sortData.resize(n);
	idxBuffer.resize(n);
	old.resize(n);
	
	unsigned int* idxSrc = &idxBuffer[0];
	unsigned int* idxDst = &idxBuffer[0];
	int nIdxSrc, nIdxDst;
    
	const int N_FLG = 256;
	const int FLG_MASK = N_FLG-1;
	unsigned int flg[N_FLG/32];
#if defined(_DEBUG)
	for(int i=0; i<n; i++)
		cs[i].getBatchIdx() = -1;
#endif
	for(int i=0; i<n; i++) 
		idxSrc[i] = i;
	nIdxSrc = n;
    
	int batchIdx = 0;
    
	{
		B3_PROFILE("cpu batch innerloop");
		while( nIdxSrc )
		{
			numIter++;
			nIdxDst = 0;
			int nCurrentBatch = 0;
            
			//	clear flag
			for(int i=0; i<N_FLG/32; i++) flg[i] = 0;
            
			for(int i=0; i<nIdxSrc; i++)
			{
				int idx = idxSrc[i];
				

				b3Assert( idx < n );
				//	check if it can go
				int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
				int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
                
				
                
				int bodyA = abs(bodyAS);
				int bodyB = abs(bodyBS);
                
				int aIdx = bodyA & FLG_MASK;
				int bIdx = bodyB & FLG_MASK;
                
				unsigned int aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31));
				unsigned int bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31));
                
				bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
				bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;

                //use inv_mass!
				aUnavailable = !aIsStatic? aUnavailable:0;//
				bUnavailable = !bIsStatic? bUnavailable:0;
                
				if( aUnavailable==0 && bUnavailable==0 ) // ok
				{
					if (!aIsStatic)
						flg[ aIdx/32 ] |= (1<<(aIdx&31));
					if (!bIsStatic)
						flg[ bIdx/32 ] |= (1<<(bIdx&31));

					cs[idx].getBatchIdx() = batchIdx;
					sortData[idx].m_key = batchIdx;
					sortData[idx].m_value = idx;
                    
					{
						nCurrentBatch++;
						if( nCurrentBatch == simdWidth )
						{
							nCurrentBatch = 0;
							for(int i=0; i<N_FLG/32; i++) flg[i] = 0;
						}
					}
				}
				else
				{
					idxDst[nIdxDst++] = idx;
				}
			}
			b3Swap( idxSrc, idxDst );
			b3Swap( nIdxSrc, nIdxDst );
			batchIdx ++;
		}
	}
	{
		B3_PROFILE("quickSort");
		sortData.quickSort(sortfnc);
	}
	
	
	{
        B3_PROFILE("reorder");
		//	reorder
		
		memcpy( &old[0], cs, sizeof(b3Contact4)*n);
		for(int i=0; i<n; i++)
		{
			int idx = sortData[i].m_value;
			cs[i] = old[idx];
		}
	}
    
	
#if defined(_DEBUG)
    //		debugPrintf( "nBatches: %d\n", batchIdx );
	for(int i=0; i<n; i++)
    {
        b3Assert( cs[i].getBatchIdx() != -1 );
    }
#endif
	return batchIdx;
}
void OpenGLExampleBrowser::update(float deltaTime)
{
	b3ChromeUtilsEnableProfiling();
	
		B3_PROFILE("OpenGLExampleBrowser::update");
		assert(glGetError()==GL_NO_ERROR);
		s_instancingRenderer->init();
        DrawGridData dg;
        dg.upAxis = s_app->getUpAxis();

        {
            BT_PROFILE("Update Camera and Light");

	

            s_instancingRenderer->updateCamera(dg.upAxis);
        }

		
		static int frameCount = 0;
		frameCount++;

		if (0)
		{
            BT_PROFILE("Draw frame counter");
            char bla[1024];
            sprintf(bla,"Frame %d", frameCount);
            s_app->drawText(bla,10,10);
		}

    if (gPngFileName)
    {
        
        static int skip = 0;
        skip--;
        if (skip<0)
        {
            skip=gPngSkipFrames;
            //printf("gPngFileName=%s\n",gPngFileName);
            static int s_frameCount = 100;
            
            sprintf(staticPngFileName,"%s%d.png",gPngFileName,s_frameCount++);
            //b3Printf("Made screenshot %s",staticPngFileName);
            s_app->dumpNextFrameToPng(staticPngFileName);
            glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
        }
    }

		
		if (sCurrentDemo)
		{
			if (!pauseSimulation || singleStepSimulation)
			{
				
				//printf("---------------------------------------------------\n");
				//printf("Framecount = %d\n",frameCount);
				B3_PROFILE("sCurrentDemo->stepSimulation");

				if (gFixedTimeStep>0)
				{
				
					sCurrentDemo->stepSimulation(gFixedTimeStep);
				} else
				{
					sCurrentDemo->stepSimulation(deltaTime);//1./60.f);
				}
			}
			
			if (renderGrid)
            {
                BT_PROFILE("Draw Grid");
                glPolygonOffset(3.0, 3);
                glEnable(GL_POLYGON_OFFSET_FILL);
                s_app->drawGrid(dg);
                
            }
			if (renderVisualGeometry && ((gDebugDrawFlags&btIDebugDraw::DBG_DrawWireframe)==0))
            {
				if (visualWireframe)
				{
					glPolygonMode( GL_FRONT_AND_BACK, GL_LINE );
				}
                BT_PROFILE("Render Scene");
                sCurrentDemo->renderScene();
            }
			//else
            {
				B3_PROFILE("physicsDebugDraw");
				glPolygonMode( GL_FRONT_AND_BACK, GL_FILL );
                sCurrentDemo->physicsDebugDraw(gDebugDrawFlags);
            }
		}

    

		{
			
			if (gui2 && s_guiHelper && s_guiHelper->getRenderInterface() && s_guiHelper->getRenderInterface()->getActiveCamera())
			{
				B3_PROFILE("setStatusBarMessage");
				char msg[1024];
				float camDist = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraDistance();
				float pitch = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraPitch();
				float yaw = s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraYaw();
				float camTarget[3];
				s_guiHelper->getRenderInterface()->getActiveCamera()->getCameraTargetPosition(camTarget);
				sprintf(msg,"dist=%f, pitch=%f, yaw=%f,target=%f,%f,%f", camDist,pitch,yaw,camTarget[0],camTarget[1],camTarget[2]);
				gui2->setStatusBarMessage(msg, true);	
			}
			
		}

		static int toggle = 1;
		if (renderGui)
		{
			B3_PROFILE("renderGui");
#ifndef BT_NO_PROFILE

			if (!pauseSimulation || singleStepSimulation)
			{
				if (isProfileWindowVisible(s_profWindow))
				{
				    processProfileData(s_profWindow,false);
				}
			}
#endif //#ifndef BT_NO_PROFILE

			
			if (sUseOpenGL2)
			{

				saveOpenGLState(s_instancingRenderer->getScreenWidth(), s_instancingRenderer->getScreenHeight());
			}
			
			if (m_internalData->m_gui)
			{
				gBlockGuiMessages = true;
				m_internalData->m_gui->draw(s_instancingRenderer->getScreenWidth(), s_instancingRenderer->getScreenHeight());
				

				gBlockGuiMessages = false;
			}
			
            if (sUseOpenGL2)
            {
                restoreOpenGLState();
            }

		}
	
	singleStepSimulation = false;
	
				
		toggle=1-toggle;
        {
            BT_PROFILE("Sync Parameters");
			if (s_parameterInterface)
			{
				s_parameterInterface->syncParameters();
			}
        }
        {
            BT_PROFILE("Swap Buffers");
            s_app->swapBuffer();
        }
	
		if (gui2)
		{
			B3_PROFILE("forceUpdateScrollBars");
			gui2->forceUpdateScrollBars();
		}

}
Beispiel #24
0
int main(int argc, char* argv[])
{

	b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile);
	b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile);

	b3SetCustomPrintfFunc(myprintf);
	b3Vector3 test=b3MakeVector3(1,2,3);
	test.x = 1;
	test.y = 4;

	b3Printf("main start");

	b3CommandLineArgs args(argc,argv);


	if (args.CheckCmdLineFlag("help"))
	{
		Usage();
		return 0;
	}


	args.GetCmdLineArgument("selected_demo",selectedDemo);


	bool benchmark=args.CheckCmdLineFlag("benchmark");
	args.GetCmdLineArgument("max_framecount",maxFrameCount);



	dump_timings=args.CheckCmdLineFlag("dump_timings");
	
#ifndef B3_NO_PROFILE
	b3ProfileManager::Reset();
#endif //B3_NO_PROFILE


	window = new b3gDefaultOpenGLWindow();

	b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight);

	window->createWindow(wci);
	window->setResizeCallback(MyResizeCallback);
	window->setMouseMoveCallback(MyMouseMoveCallback);
	window->setMouseButtonCallback(MyMouseButtonCallback);
	window->setKeyboardCallback(MyKeyboardCallback);

	window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org");
	


#ifndef __APPLE__
	glewInit();
#endif

	gui = new GwenUserInterface();

    b3Printf("started GwenUserInterface\n");


	GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight);

	stash = initFont(&prim);


	if (gui)
	{
		gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale());

		b3Printf("init fonts\n");


		gui->setToggleButtonCallback(MyButtonCallback);

		gui->registerToggleButton(MYPAUSE,"Pause");
		gui->registerToggleButton(MYPROFILE,"Profile");
		gui->registerToggleButton(MYRESET,"Reset");

		int numItems = sizeof(allDemos)/sizeof(CpuDemo::CreateFunc*);
		demoNames.clear();
		for (int i=0;i<numItems;i++)
		{
			CpuDemo* demo = allDemos[i]();
			demoNames.push_back(demo->getName());
			delete demo;
		}

		gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0]);
		gui->setComboBoxCallback(MyComboBoxCallback);
	}



	do
	{
		bool syncOnly = false;
		gReset = false;




	static bool once=true;


	//glClearColor(0.3f, 0.3f, 0.3f, 1.0f);
	glClearColor(1,1,1,1);
	glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);

	window->setWheelCallback(b3DefaultWheelCallback);




	{
		CpuDemo* demo = allDemos[selectedDemo]();
		sDemo = demo;
//		demo->myinit();
		bool useGpu = false;

		
		int maxObjectCapacity=1024*1024;//128*1024;
		int maxShapeCapacityInBytes=10*1024*1024;

		//maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10);

		
		CpuDemo::ConstructionInfo ci;
		ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity,maxShapeCapacityInBytes);
		ci.m_window = window;
		ci.m_gui = gui;
		ci.m_instancingRenderer->init();
		ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
		ci.m_instancingRenderer->InitShaders();
		ci.m_primRenderer = &prim;
		

//		render.init();
		

		{
			demo->initPhysics(ci);
		}



		

		

		FILE* csvFile = 0;
		FILE* detailsFile = 0;

		if (benchmark)
		{
			gPause = false;
			char prefixFileName[1024];
			char csvFileName[1024];
			char detailsFileName[1024];

			
			
			//todo: move this time stuff into the Platform/Window class
#ifdef _WIN32
			SYSTEMTIME time;
			GetLocalTime(&time);
			char buf[1024];
			DWORD dwCompNameLen = 1024;
			if (0 != GetComputerName(buf, &dwCompNameLen))
			{
				printf("%s", buf);
			} else
			{
				printf("unknown", buf);
			}

			sprintf(prefixFileName,"%s_%s_%s_date_%d-%d-%d_time_%d-%d-%d","CPU",buf,demoNames[selectedDemo],time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond);
			
#else
			timeval now;
			gettimeofday(&now,0);
			
			struct tm* ptm;
			ptm = localtime (&now.tv_sec);
			char buf[1024];
#ifdef __APPLE__
			sprintf(buf,"MacOSX");
#else
			sprintf(buf,"Unix");
#endif
			sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,
					ptm->tm_mday,
					ptm->tm_mon+1,
					ptm->tm_year+1900,
					ptm->tm_hour,
					ptm->tm_min,
					ptm->tm_sec);
			
#endif

			sprintf(csvFileName,"%s.csv",prefixFileName);
			sprintf(detailsFileName,"%s.txt",prefixFileName);
			printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName);

			//GetSystemTime(&time2);

			csvFile=fopen(csvFileName,"w");
			detailsFile = fopen(detailsFileName,"w");
			if (detailsFile)
				defaultOutput = detailsFile;

			//if (f)
			//	fprintf(f,"%s (%dx%dx%d=%d),\n",  g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ);
		}

		
		
		
		do
		{


			GLint err = glGetError();
			assert(err==GL_NO_ERROR);


			if (exportFrame || exportMovie)
			{
				
				if (!renderTexture)
				{
					renderTexture = new GLRenderToTexture();
					GLuint renderTextureId;
					glGenTextures(1, &renderTextureId);

					// "Bind" the newly created texture : all future texture functions will modify this texture
					glBindTexture(GL_TEXTURE_2D, renderTextureId);

					// Give an empty image to OpenGL ( the last "0" )
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0);
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);
					glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);

					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);

					renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR);
				}
				
				bool result = renderTexture->enable();
			} 
			
			err = glGetError();
			assert(err==GL_NO_ERROR);

			b3ProfileManager::Reset();
			b3ProfileManager::Increment_Frame_Counter();

//			render.reshape(g_OpenGLWidth,g_OpenGLHeight);
			ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
			prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			window->startRendering();

			err = glGetError();
			assert(err==GL_NO_ERROR);

			glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT);
			glEnable(GL_DEPTH_TEST);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			if (!gPause)
			{
				B3_PROFILE("clientMoveAndDisplay");

				demo->clientMoveAndDisplay();
			}
			else
			{

			}

			{
				B3_PROFILE("renderScene");
				demo->renderScene();
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			/*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects())
			{
				B3_PROFILE("renderPhysicsWorld");
				b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray();
				b3CollisionObject** colObjArray = &arr[0];

				render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly);
				syncOnly = true;

			}
			*/

			
			if (exportFrame || exportMovie)
			{
				
				char fileName[1024];
				sprintf(fileName,"screenShot%d.png",frameIndex++);
				writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName);
				exportFrame = false;
				renderTexture->disable();
			}


			{
				B3_PROFILE("gui->draw");
				if (gui && gDrawGui)
					gui->draw(g_OpenGLWidth,g_OpenGLHeight);
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			{
				B3_PROFILE("window->endRendering");
				window->endRendering();
			}

			err = glGetError();
			assert(err==GL_NO_ERROR);

			{
				B3_PROFILE("glFinish");
			}

			

			if (dump_timings)
			{
				b3ProfileManager::dumpAll(stdout);
			}

			if (csvFile)
			{
				static int frameCount=0;

				if (frameCount>0)
				{
					DumpSimulationTime(csvFile);
					if (detailsFile)
					{
							fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount);
							b3ProfileManager::dumpAll(detailsFile);
					}
				}

				if (frameCount>=maxFrameCount)
					window->setRequestExit();
				frameCount++;
			}


			if (gStep)
				gPause=true;

		} while (!window->requestedExit() && !gReset);


		demo->exitPhysics();
		b3ProfileManager::CleanupMemory();
		delete ci.m_instancingRenderer;

		delete demo;
		sDemo = 0;

		if (detailsFile)
		{
			fclose(detailsFile);
			detailsFile=0;
		}
		if (csvFile)
		{
			fclose(csvFile);
			csvFile=0;
		}
	}



	} while (gReset);


	if (gui)
		gui->setComboBoxCallback(0);

	{

	

		delete gui;
		gui=0;

		exitFont();


		window->closeWindow();
		delete window;
		window = 0;

	}

	return 0;
}
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal)
{
	B3_PROFILE("solveGroupCacheFriendlyFinish");
	//	int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
	//	int i,j;

	{
		if (gpuBreakConstraints)
		{
			B3_PROFILE("breakViolatedConstraintsKernel");
			b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_breakViolatedConstraintsKernel, "m_breakViolatedConstraintsKernel");
			launcher.setBuffer(gpuConstraints->getBufferCL());
			launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
			launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
			launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
			launcher.setConst(numConstraints);
			launcher.launch1D(numConstraints);
		}
		else
		{
			gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints);
			m_gpuData->m_gpuBatchConstraints->copyToHost(m_gpuData->m_cpuBatchConstraints);
			m_gpuData->m_gpuConstraintRows->copyToHost(m_gpuData->m_cpuConstraintRows);
			gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints);
			m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1);
			m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets);

			for (int cid = 0; cid < numConstraints; cid++)
			{
				int originalConstraintIndex = batchConstraints[cid].m_originalConstraintIndex;
				int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[originalConstraintIndex];
				int numRows = m_gpuData->m_cpuConstraintInfo1[originalConstraintIndex];
				if (numRows)
				{
					//	printf("cid=%d, breakingThreshold =%f\n",cid,breakingThreshold);
					for (int i = 0; i < numRows; i++)
					{
						int rowIndex = constraintRowOffset + i;
						int orgConstraintIndex = m_gpuData->m_cpuConstraintRows[rowIndex].m_originalConstraintIndex;
						float breakingThreshold = m_gpuData->m_cpuConstraints[orgConstraintIndex].m_breakingImpulseThreshold;
						//	printf("rows[%d].m_appliedImpulse=%f\n",rowIndex,rows[rowIndex].m_appliedImpulse);
						if (b3Fabs(m_gpuData->m_cpuConstraintRows[rowIndex].m_appliedImpulse) >= breakingThreshold)
						{
							m_gpuData->m_cpuConstraints[orgConstraintIndex].m_flags = 0;  //&= ~B3_CONSTRAINT_FLAG_ENABLED;
						}
					}
				}
			}

			gpuConstraints->copyFromHost(m_gpuData->m_cpuConstraints);
		}
	}

	{
		if (useGpuWriteBackVelocities)
		{
			B3_PROFILE("GPU write back velocities and transforms");

			b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_writeBackVelocitiesKernel, "m_writeBackVelocitiesKernel");
			launcher.setBuffer(gpuBodies->getBufferCL());
			launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
			launcher.setConst(numBodies);
			launcher.launch1D(numBodies);
			clFinish(m_gpuData->m_queue);
			//			m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
			//			m_gpuData->m_gpuBodies->copyToHostPointer(bodies,numBodies);
			//m_gpuData->m_gpuBodies->copyToHost(testBodies);
		}
		else
		{
			B3_PROFILE("CPU write back velocities and transforms");

			m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
			gpuBodies->copyToHost(m_gpuData->m_cpuBodies);
			for (int i = 0; i < m_tmpSolverBodyPool.size(); i++)
			{
				int bodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex;
				//printf("bodyIndex=%d\n",bodyIndex);
				b3Assert(i == bodyIndex);

				b3RigidBodyData* body = &m_gpuData->m_cpuBodies[bodyIndex];
				if (body->m_invMass)
				{
					if (infoGlobal.m_splitImpulse)
						m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp);
					else
						m_tmpSolverBodyPool[i].writebackVelocity();

					if (m_usePgs)
					{
						body->m_linVel = m_tmpSolverBodyPool[i].m_linearVelocity;
						body->m_angVel = m_tmpSolverBodyPool[i].m_angularVelocity;
					}
					else
					{
						b3Assert(0);
					}
					/*			
					if (infoGlobal.m_splitImpulse)
					{
						body->m_pos = m_tmpSolverBodyPool[i].m_worldTransform.getOrigin();
						b3Quaternion orn;
						orn = m_tmpSolverBodyPool[i].m_worldTransform.getRotation();
						body->m_quat = orn;
					}
					*/
				}
			}  //for

			gpuBodies->copyFromHost(m_gpuData->m_cpuBodies);
		}
	}

	clFinish(m_gpuData->m_queue);

	m_tmpSolverContactConstraintPool.resizeNoInitialize(0);
	m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0);
	m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0);
	m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0);

	m_tmpSolverBodyPool.resizeNoInitialize(0);
	return 0.f;
}
void PhysicsServerExample::renderScene()
{
	B3_PROFILE("PhysicsServerExample::RenderScene");
	static char line0[1024];
		static char line1[1024];

	if (gEnableRealTimeSimVR)
	{
		
		static int frameCount=0;
		static btScalar prevTime = m_clock.getTimeSeconds();
		frameCount++;
		
		static btScalar worseFps = 1000000;
		int numFrames = 200;
		static int count = 0;
		count++;

		if (0 == (count & 1))
		{
			btScalar curTime = m_clock.getTimeSeconds();
			btScalar fps = 1. / (curTime - prevTime);
			prevTime = curTime;
			if (fps < worseFps)
			{
				worseFps = fps;
			}

			if (count > numFrames)
			{
				count = 0;
				sprintf(line0, "fps:%f frame:%d", worseFps, frameCount / 2);
				sprintf(line1, "drop:%d tscale:%f dt:%f, substep %f)", gDroppedSimulationSteps, simTimeScalingFactor,gDtInSec, gSubStep);
				gDroppedSimulationSteps = 0;

				worseFps = 1000000;
			}
		}

#ifdef BT_ENABLE_VR
		if ((gInternalSimFlags&2 ) && m_tinyVrGui==0)
		{
			ComboBoxParams comboParams;
        comboParams.m_comboboxId = 0;
        comboParams.m_numItems = 0;
        comboParams.m_startItem = 0;
        comboParams.m_callback = 0;//MyComboBoxCallback;
        comboParams.m_userPointer = 0;//this;
        
			m_tinyVrGui = new TinyVRGui(comboParams,this->m_multiThreadedHelper->m_childGuiHelper->getRenderInterface());
			m_tinyVrGui->init();
		}

		if (m_tinyVrGui)
		{

			b3Transform tr;tr.setIdentity();
			tr.setOrigin(b3MakeVector3(gVRController2Pos[0],gVRController2Pos[1],gVRController2Pos[2]));
			tr.setRotation(b3Quaternion(gVRController2Orn[0],gVRController2Orn[1],gVRController2Orn[2],gVRController2Orn[3]));
			tr = tr*b3Transform(b3Quaternion(0,0,-SIMD_HALF_PI),b3MakeVector3(0,0,0));
			b3Scalar dt = 0.01;
			m_tinyVrGui->clearTextArea();
			
			m_tinyVrGui->grapicalPrintf(line0,0,0,0,0,0,255);
			m_tinyVrGui->grapicalPrintf(line1,0,16,255,255,255,255);

			m_tinyVrGui->tick(dt,tr);
		}
#endif//BT_ENABLE_VR
	}
	///debug rendering
	//m_args[0].m_cs->lock();
	
	//gVRTeleportPos[0] += 0.01;
	vrOffset[12]=-gVRTeleportPos[0];
	vrOffset[13]=-gVRTeleportPos[1];
	vrOffset[14]=-gVRTeleportPos[2];

	this->m_multiThreadedHelper->m_childGuiHelper->getRenderInterface()->
		getActiveCamera()->setVRCameraOffsetTransform(vrOffset);

	m_physicsServer.renderScene();
	
	for (int i=0;i<MAX_VR_CONTROLLERS;i++)
	{
		if (m_args[0].m_isVrControllerPicking[i] || m_args[0].m_isVrControllerDragging[i])
		{
			btVector3 from = m_args[0].m_vrControllerPos[i];
			btMatrix3x3 mat(m_args[0].m_vrControllerOrn[i]);
	
			btVector3 toX = from+mat.getColumn(0);
			btVector3 toY = from+mat.getColumn(1);
			btVector3 toZ = from+mat.getColumn(2);
	
			int width = 2;

	
			btVector4 color;
			color=btVector4(1,0,0,1);
			m_guiHelper->getAppInterface()->m_renderer->drawLine(from,toX,color,width);
			color=btVector4(0,1,0,1);
			m_guiHelper->getAppInterface()->m_renderer->drawLine(from,toY,color,width);
			color=btVector4(0,0,1,1);
			m_guiHelper->getAppInterface()->m_renderer->drawLine(from,toZ,color,width);
	
		}
	}

	if (m_guiHelper->getAppInterface()->m_renderer->getActiveCamera()->isVRCamera())
	{
		gEnableRealTimeSimVR = true;
	}

	if (gDebugRenderToggle)
	if (m_guiHelper->getAppInterface()->m_renderer->getActiveCamera()->isVRCamera())
	{
		
		B3_PROFILE("Draw Debug HUD");
		//some little experiment to add text/HUD to a VR camera (HTC Vive/Oculus Rift)


		float pos[4];
		m_guiHelper->getAppInterface()->m_renderer->getActiveCamera()->getCameraTargetPosition(pos);
		pos[0]+=gVRTeleportPos[0];
		pos[1]+=gVRTeleportPos[1];
		pos[2]+=gVRTeleportPos[2];

		btTransform viewTr;
		btScalar m[16];
		float mf[16];
		m_guiHelper->getAppInterface()->m_renderer->getActiveCamera()->getCameraViewMatrix(mf);
		for (int i=0;i<16;i++)
		{
			m[i] = mf[i];
		}
		m[12]=+gVRTeleportPos[0];
		m[13]=+gVRTeleportPos[1];
		m[14]=+gVRTeleportPos[2];
		viewTr.setFromOpenGLMatrix(m);
		btTransform viewTrInv = viewTr.inverse();
		
		btVector3 side = viewTrInv.getBasis().getColumn(0);
		btVector3 up = viewTrInv.getBasis().getColumn(1);
		btVector3 fwd = viewTrInv.getBasis().getColumn(2);

		
		float upMag = 0;
		float sideMag = 2.2;
		float fwdMag = -4;

		m_guiHelper->getAppInterface()->drawText3D(line0,pos[0]+upMag*up[0]-sideMag*side[0]+fwdMag*fwd[0],pos[1]+upMag*up[1]-sideMag*side[1]+fwdMag*fwd[1],pos[2]+upMag*up[2]-sideMag*side[2]+fwdMag*fwd[2],1);
		//btVector3 fwd = viewTrInv.getBasis().getColumn(2);
		
		up = viewTrInv.getBasis().getColumn(1);
		upMag = -0.3;
		
		
		
		m_guiHelper->getAppInterface()->drawText3D(line1,pos[0]+upMag*up[0]-sideMag*side[0]+fwdMag*fwd[0],pos[1]+upMag*up[1]-sideMag*side[1]+fwdMag*fwd[1],pos[2]+upMag*up[2]-sideMag*side[2]+fwdMag*fwd[2],1);
	}

	//m_args[0].m_cs->unlock();
}
Beispiel #27
0
void b3Solver::solveContactConstraint(  const b3OpenCLArray<b3RigidBodyCL>* bodyBuf, const b3OpenCLArray<b3InertiaCL>* shapeBuf,
                                        b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
{


    b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 );
    {

        const int nn = N_SPLIT*N_SPLIT;

        cdata.x = 0;
        cdata.y = maxNumBatches;//250;


        int numWorkItems = 64*nn/N_BATCHES;
#ifdef DEBUG_ME
        SolverDebugInfo* debugInfo = new  SolverDebugInfo[numWorkItems];
        adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
#endif



        {

            B3_PROFILE("m_batchSolveKernel iterations");
            for(int iter=0; iter<m_nIterations; iter++)
            {
                for(int ib=0; ib<N_BATCHES; ib++)
                {

                    if (verify)
                    {
                        checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib);
                    }

#ifdef DEBUG_ME
                    memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
                    gpuDebugInfo.write(debugInfo,numWorkItems);
#endif


                    cdata.z = ib;
                    cdata.w = N_SPLIT;

                    b3LauncherCL launcher( m_queue, m_solveContactKernel );
#if 1

                    b3BufferInfoCL bInfo[] = {

                        b3BufferInfoCL( bodyBuf->getBufferCL() ),
                        b3BufferInfoCL( shapeBuf->getBufferCL() ),
                        b3BufferInfoCL( constraint->getBufferCL() ),
                        b3BufferInfoCL( m_numConstraints->getBufferCL() ),
                        b3BufferInfoCL( m_offsets->getBufferCL() )
#ifdef DEBUG_ME
                        ,	b3BufferInfoCL(&gpuDebugInfo)
#endif
                    };



                    launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
                    //launcher.setConst(  cdata.x );
                    launcher.setConst(  cdata.y );
                    launcher.setConst(  cdata.z );
                    launcher.setConst(  cdata.w );
                    launcher.launch1D( numWorkItems, 64 );


#else
                    const char* fileName = "m_batchSolveKernel.bin";
                    FILE* f = fopen(fileName,"rb");
                    if (f)
                    {
                        int sizeInBytes=0;
                        if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET))
                        {
                            printf("error, cannot get file size\n");
                            exit(0);
                        }

                        unsigned char* buf = (unsigned char*) malloc(sizeInBytes);
                        fread(buf,sizeInBytes,1,f);
                        int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context);
                        int num = *(int*)&buf[serializedBytes];

                        launcher.launch1D( num);

                        //this clFinish is for testing on errors
                        clFinish(m_queue);
                    }

#endif


#ifdef DEBUG_ME
                    clFinish(m_queue);
                    gpuDebugInfo.read(debugInfo,numWorkItems);
                    clFinish(m_queue);
                    for (int i=0; i<numWorkItems; i++)
                    {
                        if (debugInfo[i].m_valInt2>0)
                        {
                            printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2);
                        }

                        if (debugInfo[i].m_valInt3>0)
                        {
                            printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3);
                        }
                    }
#endif //DEBUG_ME


                }
            }

            clFinish(m_queue);


        }

        cdata.x = 1;
        bool applyFriction=true;
        if (applyFriction)
        {
            B3_PROFILE("m_batchSolveKernel iterations2");
            for(int iter=0; iter<m_nIterations; iter++)
            {
                for(int ib=0; ib<N_BATCHES; ib++)
                {
                    cdata.z = ib;
                    cdata.w = N_SPLIT;

                    b3BufferInfoCL bInfo[] = {
                        b3BufferInfoCL( bodyBuf->getBufferCL() ),
                        b3BufferInfoCL( shapeBuf->getBufferCL() ),
                        b3BufferInfoCL( constraint->getBufferCL() ),
                        b3BufferInfoCL( m_numConstraints->getBufferCL() ),
                        b3BufferInfoCL( m_offsets->getBufferCL() )
#ifdef DEBUG_ME
                        ,b3BufferInfoCL(&gpuDebugInfo)
#endif //DEBUG_ME
                    };
                    b3LauncherCL launcher( m_queue, m_solveFrictionKernel );
                    launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
                    //launcher.setConst(  cdata.x );
                    launcher.setConst(  cdata.y );
                    launcher.setConst(  cdata.z );
                    launcher.setConst(  cdata.w );

                    launcher.launch1D( 64*nn/N_BATCHES, 64 );
                }
            }
            clFinish(m_queue);

        }
#ifdef DEBUG_ME
        delete[] debugInfo;
#endif //DEBUG_ME
    }


}
void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index)
{
	B3_PROFILE("solveContacts");
	m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
	m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
	m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts);

	if (optionalSortContactsDeterminism)
	{
		if (!gCpuSortContactsDeterminism)
		{
			B3_PROFILE("GPU Sort contact constraints (determinism)");

			m_data->m_pBufContactOutGPUCopy->resize(numContacts);
			m_data->m_contactKeyValues->resize(numContacts);

			m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0);

			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
						
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}

			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);

			{
				B3_PROFILE("gpu reorderContactKernel (determinism)");
                                
				b3Int4 cdata;
				cdata.x = numContacts;
                                
				//b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
				//	, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
				b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst( cdata );
				launcher.launch1D( numContacts, 64 );
            }

		} else
		{
			B3_PROFILE("CPU Sort contact constraints (determinism)");
			b3AlignedObjectArray<b3Contact4> cpuConstraints;
			m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints);
			bool sort = true;
			if (sort)
			{
				cpuConstraints.quickSort(b3ContactCmp);

				for (int i=0;i<cpuConstraints.size();i++)
				{
					cpuConstraints[i].m_batchIdx = i;
				}
			}
			m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints);
			if (m_debugOutput==100)
			{
				for (int i=0;i<cpuConstraints.size();i++)
				{
					printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx);
				}
			}

			m_debugOutput++;
		}
	}
	



	int nContactOut = m_data->m_pBufContactOutGPU->size();

	bool useSolver = true;
	

    if (useSolver)
    {
        float dt=1./60.;
        b3ConstraintCfg csCfg( dt );
        csCfg.m_enableParallelSolve = true;
        csCfg.m_batchCellSize = 6;
        csCfg.m_staticIdx = static0Index;
        
        
        b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU;

        void* additionalData = 0;//m_data->m_frictionCGPU;
        const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU;
        b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
        int nContacts = nContactOut;
        
        
		int maxNumBatches = 0;
 
		if (!gUseLargeBatches)
        {
            
            if( m_data->m_solverGPU->m_contactBuffer2)
            {
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
            
            if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
            {
				m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts );
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
			
            //clFinish(m_data->m_queue);
            
            
            
			{
				B3_PROFILE("batching");
				//@todo: just reserve it, without copy of original contact (unless we use warmstarting)



				const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf;


				{

					//b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
					//b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );

					const int sortAlignment = 512; // todo. get this out of sort
					if( csCfg.m_enableParallelSolve )
					{


						int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment );

						b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
						b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;


						if (!gCpuSetSortData)
						{	//	2. set cell idx
							B3_PROFILE("GPU set cell idx");
							struct CB
							{
								int m_nContacts;
								int m_staticIdx;
								float m_scale;
								b3Int4 m_nSplit;
							};

							b3Assert( sortSize%64 == 0 );
							CB cdata;
							cdata.m_nContacts = nContacts;
							cdata.m_staticIdx = csCfg.m_staticIdx;
							cdata.m_scale = 1.f/csCfg.m_batchCellSize;
							cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X;
							cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y;
							cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z;

							m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);


							b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
							b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" );
							launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
							launcher.setConst( cdata.m_nContacts );
							launcher.setConst( cdata.m_scale );
							launcher.setConst(cdata.m_nSplit);
							launcher.setConst(cdata.m_staticIdx);


							launcher.launch1D( sortSize, 64 );
						} else
						{
							m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
							b3AlignedObjectArray<b3SortData> sortDataCPU;
							m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU);

							b3AlignedObjectArray<b3Contact4> contactCPU;
							m_data->m_pBufContactOutGPU->copyToHost(contactCPU);
							b3AlignedObjectArray<b3RigidBodyData> bodiesCPU;
							bodyBuf->copyToHost(bodiesCPU);
							float scale = 1.f/csCfg.m_batchCellSize;
							b3Int4 nSplit;
							nSplit.x = B3_SOLVER_N_SPLIT_X;
							nSplit.y = B3_SOLVER_N_SPLIT_Y;
							nSplit.z = B3_SOLVER_N_SPLIT_Z;

							SetSortDataCPU(&contactCPU[0],  &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx);


							m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU);
						}



						if (!gCpuRadixSort)
						{	//	3. sort by cell idx
							B3_PROFILE("gpuRadixSort");
							//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
							//int sortBit = 32;
							//if( n <= 0xffff ) sortBit = 16;
							//if( n <= 0xff ) sortBit = 8;
							//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
							//adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
							b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
							this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut);



						} else
						{
							b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
							b3AlignedObjectArray<b3SortData> hostValues;
							keyValuesInOut.copyToHost(hostValues);
							hostValues.quickSort(sortfnc);
							keyValuesInOut.copyFromHost(hostValues);
						}


						if (gUseScanHost)
						{
							//	4. find entries
							B3_PROFILE("cpuBoundSearch");
							b3AlignedObjectArray<unsigned int> countsHost;
							countsNative->copyToHost(countsHost);

							b3AlignedObjectArray<b3SortData> sortDataHost;
							m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost);


							//m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
							m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);

							countsNative->copyFromHost(countsHost);


							//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
							//	B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );

							//unsigned int sum;
							//m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
							b3AlignedObjectArray<unsigned int> offsetsHost;
							offsetsHost.resize(offsetsNative->size());


							m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum );
							offsetsNative->copyFromHost(offsetsHost);

							//printf("sum = %d\n",sum);
						}  else
						{
							//	4. find entries
							B3_PROFILE("gpuBoundSearch");
							m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
							m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
						} 




						if (nContacts)
						{	//	5. sort constraints by cellIdx
							if (gReorderContactsOnCpu)
							{
								B3_PROFILE("cpu m_reorderContactKernel");
								b3AlignedObjectArray<b3SortData> sortDataHost;
								m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost);
								b3AlignedObjectArray<b3Contact4> inContacts;
								b3AlignedObjectArray<b3Contact4> outContacts;
								m_data->m_pBufContactOutGPU->copyToHost(inContacts);
								outContacts.resize(inContacts.size());
								for (int i=0;i<nContacts;i++)
								{
									int srcIdx = sortDataHost[i].y;
									outContacts[i] = inContacts[srcIdx];
								}
								m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts);

								/*								"void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n"
								"{\n"
								"	int nContacts = cb.x;\n"
								"	int gIdx = GET_GLOBAL_IDX;\n"
								"	if( gIdx < nContacts )\n"
								"	{\n"
								"		int srcIdx = sortData[gIdx].y;\n"
								"		out[gIdx] = in[srcIdx];\n"
								"	}\n"
								"}\n"
								*/
							} else
							{
								B3_PROFILE("gpu m_reorderContactKernel");

								b3Int4 cdata;
								cdata.x = nContacts;

								b3BufferInfoCL bInfo[] = { 
									b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), 
									b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
									, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };

									b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
									launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
									launcher.setConst( cdata );
									launcher.launch1D( nContacts, 64 );
							}
						}




					}

				}

				//clFinish(m_data->m_queue);

				//				{
				//				b3AlignedObjectArray<unsigned int> histogram;
				//				m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
				//				printf(",,,\n");
				//				}


				if (nContacts)
				{

					if (gUseCpuCopyConstraints)
					{
						for (int i=0;i<nContacts;i++)
						{
							m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2);
							//							m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); 
							//						m_data->m_pBufContactOutGPU->getBufferCL() 
						}

					} else
					{
						B3_PROFILE("gpu m_copyConstraintKernel");
						b3Int4 cdata; cdata.x = nContacts;
						b3BufferInfoCL bInfo[] = { 
							b3BufferInfoCL(  m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), 
							b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) 
						};

						b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" );
						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
						launcher.setConst(  cdata );
						launcher.launch1D( nContacts, 64 );
						//we use the clFinish for proper benchmark/profile
						clFinish(m_data->m_queue);
					}
				}


				bool compareGPU = false;
				if (nContacts)
				{
					if (!gCpuBatchContacts)
					{
						B3_PROFILE("gpu batchContacts");
						maxNumBatches = 150;//250;
						m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
						clFinish(m_data->m_queue);
					} else
					{
						B3_PROFILE("cpu batchContacts");
						static b3AlignedObjectArray<b3Contact4> cpuContacts;
						b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
						{
							B3_PROFILE("copyToHost");
							contactsIn->copyToHost(cpuContacts);
						}
						b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
						b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;

						b3AlignedObjectArray<unsigned int> nNativeHost;
						b3AlignedObjectArray<unsigned int> offsetsNativeHost;

						{
							B3_PROFILE("countsNative/offsetsNative copyToHost");
							countsNative->copyToHost(nNativeHost);
							offsetsNative->copyToHost(offsetsNativeHost);
						}


						int numNonzeroGrid=0;

						if (gUseLargeBatches)
						{
							m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
							int totalNumConstraints = cpuContacts.size();
							int simdWidth =numBodies+1;//-1;//64;//-1;//32;
							int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]);	//	on GPU
							maxNumBatches = b3Max(numBatches,maxNumBatches);
							static int globalMaxBatch = 0;
							if (maxNumBatches>globalMaxBatch )
							{
								globalMaxBatch  = maxNumBatches;
								b3Printf("maxNumBatches = %d\n",maxNumBatches);
							}
								
						} else
						{
							m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES);
							B3_PROFILE("cpu batch grid");
							for(int i=0; i<B3_SOLVER_N_CELLS; i++)
							{
								int n = (nNativeHost)[i];
								int offset = (offsetsNativeHost)[i];
								if( n )
								{
									numNonzeroGrid++;
									int simdWidth =numBodies+1;//-1;//64;//-1;//32;
									int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]);	//	on GPU
									maxNumBatches = b3Max(numBatches,maxNumBatches);
									static int globalMaxBatch = 0;
									if (maxNumBatches>globalMaxBatch )
									{
										globalMaxBatch  = maxNumBatches;
										b3Printf("maxNumBatches = %d\n",maxNumBatches);
									}
									//we use the clFinish for proper benchmark/profile
									
								}
							}
							//clFinish(m_data->m_queue);
						}
						{
							B3_PROFILE("m_contactBuffer->copyFromHost");
							m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
						}

					} 

				}


			


			} 


		} 


			//printf("maxNumBatches = %d\n", maxNumBatches);

		if (gUseLargeBatches)
		{
			if (nContacts)
			{
				B3_PROFILE("cpu batchContacts");
				static b3AlignedObjectArray<b3Contact4> cpuContacts;
//				b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
				{
					B3_PROFILE("copyToHost");
					m_data->m_pBufContactOutGPU->copyToHost(cpuContacts);
				}
				b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
				b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;



				int numNonzeroGrid=0;

				{
					m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
					int totalNumConstraints = cpuContacts.size();
					int simdWidth =numBodies+1;//-1;//64;//-1;//32;
					int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]);	//	on GPU
					maxNumBatches = b3Max(numBatches,maxNumBatches);
					static int globalMaxBatch = 0;
					if (maxNumBatches>globalMaxBatch )
					{
						globalMaxBatch  = maxNumBatches;
						b3Printf("maxNumBatches = %d\n",maxNumBatches);
					}
								
				}
				{
					B3_PROFILE("m_contactBuffer->copyFromHost");
					m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
				}

			} 

		}

		if (nContacts)
		{
			B3_PROFILE("gpu convertToConstraints");
			m_data->m_solverGPU->convertToConstraints( bodyBuf, 
				shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
				contactConstraintOut, 
				additionalData, nContacts, 
				(b3SolverBase::ConstraintCfg&) csCfg );
			clFinish(m_data->m_queue);
		}


		if (1)
		{
			int numIter = 4;

			m_data->m_solverGPU->m_nIterations = numIter;//10
			if (!gCpuSolveConstraint)
			{
				B3_PROFILE("GPU solveContactConstraint");

				/*m_data->m_solverGPU->solveContactConstraint(
				m_data->m_bodyBufferGPU, 
				m_data->m_inertiaBufferGPU,
				m_data->m_contactCGPU,0,
				nContactOut ,
				maxNumBatches);
				*/

				//m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes);

				if (gUseLargeBatches)
				{
					solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, 
						m_data->m_inertiaBufferGPU,
						m_data->m_contactCGPU,0,
						nContactOut ,
						maxNumBatches,numIter,&m_data->m_batchSizes);
				} else
				{
					solveContactConstraint(
						m_data->m_bodyBufferGPU, 
						m_data->m_inertiaBufferGPU,
						m_data->m_contactCGPU,0,
						nContactOut ,
						maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu);
				}
			}
			else
			{
				B3_PROFILE("Host solveContactConstraint");

				m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes);
			}
            
            
        }
        
        
#if 0
        if (0)
        {
            B3_PROFILE("read body velocities back to CPU");
            //read body updated linear/angular velocities back to CPU
            m_data->m_bodyBufferGPU->read(
                                                  m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies);
            adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL );
        }
#endif
        
    }

}
Beispiel #29
0
void	b3Solver::batchContacts(  b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx )
{

    int numWorkItems = 64*N_SPLIT*N_SPLIT;
    {
        B3_PROFILE("batch generation");

        b3Int4 cdata;
        cdata.x = nContacts;
        cdata.y = 0;
        cdata.z = staticIdx;


#ifdef BATCH_DEBUG
        SolverDebugInfo* debugInfo = new  SolverDebugInfo[numWorkItems];
        adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
        memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
        gpuDebugInfo.write(debugInfo,numWorkItems);
#endif




        b3BufferInfoCL bInfo[] = {
            b3BufferInfoCL( contacts->getBufferCL() ),
            b3BufferInfoCL(  m_contactBuffer2->getBufferCL()),
            b3BufferInfoCL( nNative->getBufferCL() ),
            b3BufferInfoCL( offsetsNative->getBufferCL() ),
#ifdef BATCH_DEBUG
            ,	b3BufferInfoCL(&gpuDebugInfo)
#endif
        };




        {
            B3_PROFILE("batchingKernel");
            //b3LauncherCL launcher( m_queue, m_batchingKernel);
            cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel;

            b3LauncherCL launcher( m_queue, k);
            if (!useNewBatchingKernel )
            {
                launcher.setBuffer( contacts->getBufferCL() );
            }
            launcher.setBuffer( m_contactBuffer2->getBufferCL() );
            launcher.setBuffer( nNative->getBufferCL());
            launcher.setBuffer( offsetsNative->getBufferCL());

            //launcher.setConst(  cdata );
            launcher.setConst(staticIdx);

            launcher.launch1D( numWorkItems, 64 );
            clFinish(m_queue);
        }

#ifdef BATCH_DEBUG
        aaaa
        b3Contact4* hostContacts = new b3Contact4[nContacts];
        m_contactBuffer->read(hostContacts,nContacts);
        clFinish(m_queue);

        gpuDebugInfo.read(debugInfo,numWorkItems);
        clFinish(m_queue);

        for (int i=0; i<numWorkItems; i++)
        {
            if (debugInfo[i].m_valInt1>0)
            {
                printf("catch\n");
            }
            if (debugInfo[i].m_valInt2>0)
            {
                printf("catch22\n");
            }

            if (debugInfo[i].m_valInt3>0)
            {
                printf("catch666\n");
            }

            if (debugInfo[i].m_valInt4>0)
            {
                printf("catch777\n");
            }
        }
        delete[] debugInfo;
#endif //BATCH_DEBUG

    }

//	copy buffer to buffer
    //b3Assert(m_contactBuffer->size()==nContacts);
    //contacts->copyFromOpenCLArray( *m_contactBuffer);
    //clFinish(m_queue);//needed?



}
Beispiel #30
0
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{

//	return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);

	B3_PROFILE("castRaysHost");
	for (int r=0;r<rays.size();r++)
	{
		b3Vector3 rayFrom = rays[r].m_from;
		b3Vector3 rayTo = rays[r].m_to;
		float hitFraction = hitResults[r].m_hitFraction;

		int hitBodyIndex= -1;
		b3Vector3 hitNormal;

		for (int b=0;b<numBodies;b++)
		{
				
			const b3Vector3& pos = bodies[b].m_pos;
			const b3Quaternion& orn = bodies[b].m_quat;
			
			switch (collidables[bodies[b].m_collidableIdx].m_shapeType)
			{
			case SHAPE_SPHERE:
				{
					b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius;
					if (sphere_intersect(pos,  radius, rayFrom, rayTo,hitFraction))
					{
						hitBodyIndex = b;
						b3Vector3 hitPoint;
						hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
						hitNormal = (hitPoint-bodies[b].m_pos).normalize();
					}
				}
			case SHAPE_CONVEX_HULL:
				{

					b3Transform convexWorldTransform;
					convexWorldTransform.setIdentity();
					convexWorldTransform.setOrigin(bodies[b].m_pos);
					convexWorldTransform.setRotation(bodies[b].m_quat);
					b3Transform convexWorld2Local = convexWorldTransform.inverse();

					b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
					b3Vector3 rayToLocal = convexWorld2Local(rayTo);
					
					
					int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
					const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
					if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal))
					{
						hitBodyIndex = b;
					}

					
					break;
				}
			default:
				{
					static bool once=true;
					if (once)
					{
						once=false;
						b3Warning("Raytest: unsupported shape type\n");
					}
				}
			}
		}
		if (hitBodyIndex>=0)
		{

			hitResults[r].m_hitFraction = hitFraction;
			hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
			hitResults[r].m_hitNormal = hitNormal;
			hitResults[r].m_hitBody = hitBodyIndex;
		}

	}
}