Пример #1
0
///todo: add some acceleration structure (AABBs, tree etc)
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{
	
	//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);

	B3_PROFILE("castRaysGPU");

	b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
	b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);

	{
		B3_PROFILE("raycast copyFromHost");
		gpuRays.copyFromHost(rays);

	
		gpuHitResults.resize(hitResults.size());
		gpuHitResults.copyFromHost(hitResults);
	}


	//run kernel
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(gpuRays.getBufferCL());
		launcher.setBuffer(gpuHitResults.getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
		
		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}

	//copy results
	{
		B3_PROFILE("raycast copyToHost");
		gpuHitResults.copyToHost(hitResults);
	}

}
Пример #2
0
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables)
{

//	return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);

	B3_PROFILE("castRaysHost");

	for (int r=0;r<rays.size();r++)
	{
		b3Vector3 rayFrom = rays[r].m_from;
		b3Vector3 rayTo = rays[r].m_to;

		//if there is a hit, color the pixels
		bool hits  = false;

		for (int b=0;b<numBodies && !hits;b++)
		{
				
			const b3Vector3& pos = bodies[b].m_pos;
			const b3Quaternion& orn = bodies[b].m_quat;
			
			b3Scalar radius = 1;

			if (sphere_intersect(pos,  radius, rayFrom, rayTo))
				hits = true;
		}
		if (hits)
			hitResults[r].m_hitFraction = 0.f;

	}
}
Пример #3
0
void	b3GeometryUtil::getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations , b3AlignedObjectArray<b3Vector3>& verticesOut )
{
	const int numbrushes = planeEquations.size();
	// brute force:
	for (int i=0;i<numbrushes;i++)
	{
		const b3Vector3& N1 = planeEquations[i];
		

		for (int j=i+1;j<numbrushes;j++)
		{
			const b3Vector3& N2 = planeEquations[j];
				
			for (int k=j+1;k<numbrushes;k++)
			{

				const b3Vector3& N3 = planeEquations[k];

				b3Vector3 n2n3; n2n3 = N2.cross(N3);
				b3Vector3 n3n1; n3n1 = N3.cross(N1);
				b3Vector3 n1n2; n1n2 = N1.cross(N2);
				
				if ( ( n2n3.length2() > b3Scalar(0.0001) ) &&
					 ( n3n1.length2() > b3Scalar(0.0001) ) &&
					 ( n1n2.length2() > b3Scalar(0.0001) ) )
				{
					//point P out of 3 plane equations:

					//	d1 ( N2 * N3 ) + d2 ( N3 * N1 ) + d3 ( N1 * N2 )  
					//P =  -------------------------------------------------------------------------  
					//   N1 . ( N2 * N3 )  


					b3Scalar quotient = (N1.dot(n2n3));
					if (b3Fabs(quotient) > b3Scalar(0.000001))
					{
						quotient = b3Scalar(-1.) / quotient;
						n2n3 *= N1[3];
						n3n1 *= N2[3];
						n1n2 *= N3[3];
						b3Vector3 potentialVertex = n2n3;
						potentialVertex += n3n1;
						potentialVertex += n1n2;
						potentialVertex *= quotient;

						//check if inside, and replace supportingVertexOut if needed
						if (isPointInsidePlanes(planeEquations,potentialVertex,b3Scalar(0.01)))
						{
							verticesOut.push_back(potentialVertex);
						}
					}
				}
			}
		}
	}
}
Пример #4
0
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables)
{
	B3_PROFILE("castRaysGPU");

	b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
	gpuRays.copyFromHost(rays);

	b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);
	gpuHitResults.resize(hitResults.size());

	b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q);
	gpuBodies.resize(numBodies);
	gpuBodies.copyFromHostPointer(bodies,numBodies);

	b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q);
	gpuCollidables.resize(numCollidables);
	gpuCollidables.copyFromHostPointer(collidables,numCollidables);


	//run kernel
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel);
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(gpuRays.getBufferCL());
		launcher.setBuffer(gpuHitResults.getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(gpuBodies.getBufferCL());
		launcher.setBuffer(gpuCollidables.getBufferCL());

		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}

	//copy results
	gpuHitResults.copyToHost(hitResults);

}
Пример #5
0
bool notExist(const b3Vector3& planeEquation,const b3AlignedObjectArray<b3Vector3>& planeEquations)
{
	int numbrushes = planeEquations.size();
	for (int i=0;i<numbrushes;i++)
	{
		const b3Vector3& N1 = planeEquations[i];
		if (planeEquation.dot(N1) > b3Scalar(0.999))
		{
			return false;
		} 
	}
	return true;
}
Пример #6
0
static void clipEdge(const mat<4, 3, float>& triangleIn, int vertexIndexA, int vertexIndexB, b3AlignedObjectArray<Vec4f>& vertices)
{
	Vec4f v0New = triangleIn.col(vertexIndexA);
	Vec4f v1New = triangleIn.col(vertexIndexB);

	bool v0Inside = v0New[3] > 0.f && v0New[2] > -v0New[3];
	bool v1Inside = v1New[3] > 0.f && v1New[2] > -v1New[3];

	if (v0Inside && v1Inside)
	{
	}
	else if (v0Inside || v1Inside)
	{
		float d0 = v0New[2] + v0New[3];
		float d1 = v1New[2] + v1New[3];
		float factor = 1.0 / (d1 - d0);
		Vec4f newVertex = (v0New * d1 - v1New * d0) * factor;
		if (v0Inside)
		{
			v1New = newVertex;
		}
		else
		{
			v0New = newVertex;
		}
	}
	else
	{
		return;
	}

	if (vertices.size() == 0 || !(equals(vertices[vertices.size() - 1], v0New)))
	{
		vertices.push_back(v0New);
	}

	vertices.push_back(v1New);
}
Пример #7
0
bool	b3GeometryUtil::areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar	margin)
{
	int numvertices = vertices.size();
	for (int i=0;i<numvertices;i++)
	{
		const b3Vector3& N1 = vertices[i];
		b3Scalar dist = b3Scalar(planeNormal.dot(N1))+b3Scalar(planeNormal[3])-margin;
		if (dist>b3Scalar(0.))
		{
			return false;
		}
	}
	return true;
}
Пример #8
0
bool	b3GeometryUtil::isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar	margin)
{
	int numbrushes = planeEquations.size();
	for (int i=0;i<numbrushes;i++)
	{
		const b3Vector3& N1 = planeEquations[i];
		b3Scalar dist = b3Scalar(N1.dot(point))+b3Scalar(N1[3])-margin;
		if (dist>b3Scalar(0.))
		{
			return false;
		}
	}
	return true;
		
}
Пример #9
0
void	b3GeometryUtil::getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut )
{
		const int numvertices = vertices.size();
	// brute force:
	for (int i=0;i<numvertices;i++)
	{
		const b3Vector3& N1 = vertices[i];
		

		for (int j=i+1;j<numvertices;j++)
		{
			const b3Vector3& N2 = vertices[j];
				
			for (int k=j+1;k<numvertices;k++)
			{

				const b3Vector3& N3 = vertices[k];

				b3Vector3 planeEquation,edge0,edge1;
				edge0 = N2-N1;
				edge1 = N3-N1;
				b3Scalar normalSign = b3Scalar(1.);
				for (int ww=0;ww<2;ww++)
				{
					planeEquation = normalSign * edge0.cross(edge1);
					if (planeEquation.length2() > b3Scalar(0.0001))
					{
						planeEquation.normalize();
						if (notExist(planeEquation,planeEquationsOut))
						{
							planeEquation[3] = -planeEquation.dot(N1);
							
								//check if inside, and replace supportingVertexOut if needed
								if (areVerticesBehindPlane(planeEquation,vertices,b3Scalar(0.01)))
								{
									planeEquationsOut.push_back(planeEquation);
								}
						}
					}
					normalSign = b3Scalar(-1.);
				}
			
			}
		}
	}

}
Пример #10
0
void	MyComboBoxCallback(int comboId, const char* item)
{
	//printf("comboId = %d, item = %s\n",comboId, item);
	if (comboId==DEMO_SELECTION_COMBOBOX)
	{
		//find selected item
		for (int i=0;i<allNames.size();i++)
		{
			if (strcmp(item,allNames[i])==0)
			{
				selectDemo(i);
				saveCurrentSettings(sCurrentDemoIndex,startFileName);
				break;
			}
		}
	}

}
Пример #11
0
void	MyComboBoxCallback(int comboId, const char* item)
{
	int numDemos = demoNames.size();
	for (int i=0;i<numDemos;i++)
	{
		if (!strcmp(demoNames[i],item))
		{
			if (selectedDemo != i)
			{
				gReset = true;
				selectedDemo = i;
				printf("selected demo %s!\n", item);
			}
		}
	}


}
Пример #12
0
///todo: add some acceleration structure (AABBs, tree etc)
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, 
		const struct b3GpuNarrowPhaseInternalData* narrowphaseData,	class b3GpuBroadphaseInterface* broadphase)
{
	//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);

	B3_PROFILE("castRaysGPU");
	
	{
		B3_PROFILE("raycast copyFromHost");
		m_data->m_gpuRays->copyFromHost(rays);
		m_data->m_gpuHitResults->copyFromHost(hitResults);
		
	}
	
	int numRays = hitResults.size();
	{
		m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
		m_data->m_numRayRigidPairsPerRay->resize(numRays);
		
		m_data->m_gpuNumRayRigidPairs->resize(1);
		m_data->m_gpuRayRigidPairs->resize(numRays * 16);
	}
	
	//run kernel
	const bool USE_BRUTE_FORCE_RAYCAST = false;
	if(USE_BRUTE_FORCE_RAYCAST)
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
		launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
		
		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}
	else
	{
		m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() );

		m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
		
		int numRayRigidPairs = -1;
		m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
		if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() )
		{
			numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
			m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
		}
		
		m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs);	//Radix sort needs b3OpenCLArray::size() to be correct
		
		//Sort ray-rigid pairs by ray index
		{
			B3_PROFILE("sort ray-rigid pairs");
			m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) );
		}
		
		//detect start,count of each ray pair
		{
			B3_PROFILE("detect ray-rigid pair index ranges");
			
			{
				B3_PROFILE("reset ray-rigid pair index ranges");
				
				m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays);	//atomic_min used to find first index
				m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
				clFinish(m_data->m_q);
			}
			
			b3BufferInfoCL bufferInfo[] = 
			{
				b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ),
				
				b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
				b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() )
			};
			
			b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
			launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(numRayRigidPairs);
			
			launcher.launch1D(numRayRigidPairs);
			clFinish(m_data->m_q);
		}
		
		{
			B3_PROFILE("ray-rigid intersection");
			
			b3BufferInfoCL bufferInfo[] = 
			{
				b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ),
				b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ),
				b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
				b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ),
				
				b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ),
				
				b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() )
			};
			
			b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
			launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(numRays);
			
			launcher.launch1D(numRays);
			clFinish(m_data->m_q);
		}
	}
	
	

	//copy results
	{
		B3_PROFILE("raycast copyToHost");
		m_data->m_gpuHitResults->copyToHost(hitResults);
	}

}
Пример #13
0
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{

//	return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);

	B3_PROFILE("castRaysHost");
	for (int r=0;r<rays.size();r++)
	{
		b3Vector3 rayFrom = rays[r].m_from;
		b3Vector3 rayTo = rays[r].m_to;
		float hitFraction = hitResults[r].m_hitFraction;

		int hitBodyIndex= -1;
		b3Vector3 hitNormal;

		for (int b=0;b<numBodies;b++)
		{
				
			const b3Vector3& pos = bodies[b].m_pos;
			const b3Quaternion& orn = bodies[b].m_quat;
			
			switch (collidables[bodies[b].m_collidableIdx].m_shapeType)
			{
			case SHAPE_SPHERE:
				{
					b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius;
					if (sphere_intersect(pos,  radius, rayFrom, rayTo,hitFraction))
					{
						hitBodyIndex = b;
						b3Vector3 hitPoint;
						hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
						hitNormal = (hitPoint-bodies[b].m_pos).normalize();
					}
				}
			case SHAPE_CONVEX_HULL:
				{

					b3Transform convexWorldTransform;
					convexWorldTransform.setIdentity();
					convexWorldTransform.setOrigin(bodies[b].m_pos);
					convexWorldTransform.setRotation(bodies[b].m_quat);
					b3Transform convexWorld2Local = convexWorldTransform.inverse();

					b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
					b3Vector3 rayToLocal = convexWorld2Local(rayTo);
					
					
					int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
					const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
					if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal))
					{
						hitBodyIndex = b;
					}

					
					break;
				}
			default:
				{
					static bool once=true;
					if (once)
					{
						once=false;
						b3Warning("Raytest: unsupported shape type\n");
					}
				}
			}
		}
		if (hitBodyIndex>=0)
		{

			hitResults[r].m_hitFraction = hitFraction;
			hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
			hitResults[r].m_hitNormal = hitNormal;
			hitResults[r].m_hitBody = hitBodyIndex;
		}

	}
}
Пример #14
0
void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index)
{
	B3_PROFILE("solveContacts");
	m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
	m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
	m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts);

	if (optionalSortContactsDeterminism)
	{
		if (!gCpuSortContactsDeterminism)
		{
			B3_PROFILE("GPU Sort contact constraints (determinism)");

			m_data->m_pBufContactOutGPUCopy->resize(numContacts);
			m_data->m_contactKeyValues->resize(numContacts);

			m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0);

			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
						
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}

			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);

			{
				B3_PROFILE("gpu reorderContactKernel (determinism)");
                                
				b3Int4 cdata;
				cdata.x = numContacts;
                                
				//b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
				//	, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
				b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst( cdata );
				launcher.launch1D( numContacts, 64 );
            }

		} else
		{
			B3_PROFILE("CPU Sort contact constraints (determinism)");
			b3AlignedObjectArray<b3Contact4> cpuConstraints;
			m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints);
			bool sort = true;
			if (sort)
			{
				cpuConstraints.quickSort(b3ContactCmp);

				for (int i=0;i<cpuConstraints.size();i++)
				{
					cpuConstraints[i].m_batchIdx = i;
				}
			}
			m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints);
			if (m_debugOutput==100)
			{
				for (int i=0;i<cpuConstraints.size();i++)
				{
					printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx);
				}
			}

			m_debugOutput++;
		}
	}
	



	int nContactOut = m_data->m_pBufContactOutGPU->size();

	bool useSolver = true;
	

    if (useSolver)
    {
        float dt=1./60.;
        b3ConstraintCfg csCfg( dt );
        csCfg.m_enableParallelSolve = true;
        csCfg.m_batchCellSize = 6;
        csCfg.m_staticIdx = static0Index;
        
        
        b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU;

        void* additionalData = 0;//m_data->m_frictionCGPU;
        const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU;
        b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
        int nContacts = nContactOut;
        
        
		int maxNumBatches = 0;
 
		if (!gUseLargeBatches)
        {
            
            if( m_data->m_solverGPU->m_contactBuffer2)
            {
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
            
            if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
            {
				m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts );
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
			
            //clFinish(m_data->m_queue);
            
            
            
			{
				B3_PROFILE("batching");
				//@todo: just reserve it, without copy of original contact (unless we use warmstarting)



				const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf;


				{

					//b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
					//b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );

					const int sortAlignment = 512; // todo. get this out of sort
					if( csCfg.m_enableParallelSolve )
					{


						int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment );

						b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
						b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;


						if (!gCpuSetSortData)
						{	//	2. set cell idx
							B3_PROFILE("GPU set cell idx");
							struct CB
							{
								int m_nContacts;
								int m_staticIdx;
								float m_scale;
								b3Int4 m_nSplit;
							};

							b3Assert( sortSize%64 == 0 );
							CB cdata;
							cdata.m_nContacts = nContacts;
							cdata.m_staticIdx = csCfg.m_staticIdx;
							cdata.m_scale = 1.f/csCfg.m_batchCellSize;
							cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X;
							cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y;
							cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z;

							m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);


							b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
							b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" );
							launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
							launcher.setConst( cdata.m_nContacts );
							launcher.setConst( cdata.m_scale );
							launcher.setConst(cdata.m_nSplit);
							launcher.setConst(cdata.m_staticIdx);


							launcher.launch1D( sortSize, 64 );
						} else
						{
							m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
							b3AlignedObjectArray<b3SortData> sortDataCPU;
							m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU);

							b3AlignedObjectArray<b3Contact4> contactCPU;
							m_data->m_pBufContactOutGPU->copyToHost(contactCPU);
							b3AlignedObjectArray<b3RigidBodyData> bodiesCPU;
							bodyBuf->copyToHost(bodiesCPU);
							float scale = 1.f/csCfg.m_batchCellSize;
							b3Int4 nSplit;
							nSplit.x = B3_SOLVER_N_SPLIT_X;
							nSplit.y = B3_SOLVER_N_SPLIT_Y;
							nSplit.z = B3_SOLVER_N_SPLIT_Z;

							SetSortDataCPU(&contactCPU[0],  &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx);


							m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU);
						}



						if (!gCpuRadixSort)
						{	//	3. sort by cell idx
							B3_PROFILE("gpuRadixSort");
							//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
							//int sortBit = 32;
							//if( n <= 0xffff ) sortBit = 16;
							//if( n <= 0xff ) sortBit = 8;
							//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
							//adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
							b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
							this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut);



						} else
						{
							b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
							b3AlignedObjectArray<b3SortData> hostValues;
							keyValuesInOut.copyToHost(hostValues);
							hostValues.quickSort(sortfnc);
							keyValuesInOut.copyFromHost(hostValues);
						}


						if (gUseScanHost)
						{
							//	4. find entries
							B3_PROFILE("cpuBoundSearch");
							b3AlignedObjectArray<unsigned int> countsHost;
							countsNative->copyToHost(countsHost);

							b3AlignedObjectArray<b3SortData> sortDataHost;
							m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost);


							//m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
							m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);

							countsNative->copyFromHost(countsHost);


							//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
							//	B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );

							//unsigned int sum;
							//m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
							b3AlignedObjectArray<unsigned int> offsetsHost;
							offsetsHost.resize(offsetsNative->size());


							m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum );
							offsetsNative->copyFromHost(offsetsHost);

							//printf("sum = %d\n",sum);
						}  else
						{
							//	4. find entries
							B3_PROFILE("gpuBoundSearch");
							m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
							m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
						} 




						if (nContacts)
						{	//	5. sort constraints by cellIdx
							if (gReorderContactsOnCpu)
							{
								B3_PROFILE("cpu m_reorderContactKernel");
								b3AlignedObjectArray<b3SortData> sortDataHost;
								m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost);
								b3AlignedObjectArray<b3Contact4> inContacts;
								b3AlignedObjectArray<b3Contact4> outContacts;
								m_data->m_pBufContactOutGPU->copyToHost(inContacts);
								outContacts.resize(inContacts.size());
								for (int i=0;i<nContacts;i++)
								{
									int srcIdx = sortDataHost[i].y;
									outContacts[i] = inContacts[srcIdx];
								}
								m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts);

								/*								"void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n"
								"{\n"
								"	int nContacts = cb.x;\n"
								"	int gIdx = GET_GLOBAL_IDX;\n"
								"	if( gIdx < nContacts )\n"
								"	{\n"
								"		int srcIdx = sortData[gIdx].y;\n"
								"		out[gIdx] = in[srcIdx];\n"
								"	}\n"
								"}\n"
								*/
							} else
							{
								B3_PROFILE("gpu m_reorderContactKernel");

								b3Int4 cdata;
								cdata.x = nContacts;

								b3BufferInfoCL bInfo[] = { 
									b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), 
									b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
									, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };

									b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
									launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
									launcher.setConst( cdata );
									launcher.launch1D( nContacts, 64 );
							}
						}




					}

				}

				//clFinish(m_data->m_queue);

				//				{
				//				b3AlignedObjectArray<unsigned int> histogram;
				//				m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
				//				printf(",,,\n");
				//				}


				if (nContacts)
				{

					if (gUseCpuCopyConstraints)
					{
						for (int i=0;i<nContacts;i++)
						{
							m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2);
							//							m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); 
							//						m_data->m_pBufContactOutGPU->getBufferCL() 
						}

					} else
					{
						B3_PROFILE("gpu m_copyConstraintKernel");
						b3Int4 cdata; cdata.x = nContacts;
						b3BufferInfoCL bInfo[] = { 
							b3BufferInfoCL(  m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), 
							b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) 
						};

						b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" );
						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
						launcher.setConst(  cdata );
						launcher.launch1D( nContacts, 64 );
						//we use the clFinish for proper benchmark/profile
						clFinish(m_data->m_queue);
					}
				}


				bool compareGPU = false;
				if (nContacts)
				{
					if (!gCpuBatchContacts)
					{
						B3_PROFILE("gpu batchContacts");
						maxNumBatches = 150;//250;
						m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
						clFinish(m_data->m_queue);
					} else
					{
						B3_PROFILE("cpu batchContacts");
						static b3AlignedObjectArray<b3Contact4> cpuContacts;
						b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
						{
							B3_PROFILE("copyToHost");
							contactsIn->copyToHost(cpuContacts);
						}
						b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
						b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;

						b3AlignedObjectArray<unsigned int> nNativeHost;
						b3AlignedObjectArray<unsigned int> offsetsNativeHost;

						{
							B3_PROFILE("countsNative/offsetsNative copyToHost");
							countsNative->copyToHost(nNativeHost);
							offsetsNative->copyToHost(offsetsNativeHost);
						}


						int numNonzeroGrid=0;

						if (gUseLargeBatches)
						{
							m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
							int totalNumConstraints = cpuContacts.size();
							int simdWidth =numBodies+1;//-1;//64;//-1;//32;
							int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]);	//	on GPU
							maxNumBatches = b3Max(numBatches,maxNumBatches);
							static int globalMaxBatch = 0;
							if (maxNumBatches>globalMaxBatch )
							{
								globalMaxBatch  = maxNumBatches;
								b3Printf("maxNumBatches = %d\n",maxNumBatches);
							}
								
						} else
						{
							m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES);
							B3_PROFILE("cpu batch grid");
							for(int i=0; i<B3_SOLVER_N_CELLS; i++)
							{
								int n = (nNativeHost)[i];
								int offset = (offsetsNativeHost)[i];
								if( n )
								{
									numNonzeroGrid++;
									int simdWidth =numBodies+1;//-1;//64;//-1;//32;
									int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]);	//	on GPU
									maxNumBatches = b3Max(numBatches,maxNumBatches);
									static int globalMaxBatch = 0;
									if (maxNumBatches>globalMaxBatch )
									{
										globalMaxBatch  = maxNumBatches;
										b3Printf("maxNumBatches = %d\n",maxNumBatches);
									}
									//we use the clFinish for proper benchmark/profile
									
								}
							}
							//clFinish(m_data->m_queue);
						}
						{
							B3_PROFILE("m_contactBuffer->copyFromHost");
							m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
						}

					} 

				}


			


			} 


		} 


			//printf("maxNumBatches = %d\n", maxNumBatches);

		if (gUseLargeBatches)
		{
			if (nContacts)
			{
				B3_PROFILE("cpu batchContacts");
				static b3AlignedObjectArray<b3Contact4> cpuContacts;
//				b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
				{
					B3_PROFILE("copyToHost");
					m_data->m_pBufContactOutGPU->copyToHost(cpuContacts);
				}
				b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
				b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;



				int numNonzeroGrid=0;

				{
					m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
					int totalNumConstraints = cpuContacts.size();
					int simdWidth =numBodies+1;//-1;//64;//-1;//32;
					int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]);	//	on GPU
					maxNumBatches = b3Max(numBatches,maxNumBatches);
					static int globalMaxBatch = 0;
					if (maxNumBatches>globalMaxBatch )
					{
						globalMaxBatch  = maxNumBatches;
						b3Printf("maxNumBatches = %d\n",maxNumBatches);
					}
								
				}
				{
					B3_PROFILE("m_contactBuffer->copyFromHost");
					m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
				}

			} 

		}

		if (nContacts)
		{
			B3_PROFILE("gpu convertToConstraints");
			m_data->m_solverGPU->convertToConstraints( bodyBuf, 
				shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
				contactConstraintOut, 
				additionalData, nContacts, 
				(b3SolverBase::ConstraintCfg&) csCfg );
			clFinish(m_data->m_queue);
		}


		if (1)
		{
			int numIter = 4;

			m_data->m_solverGPU->m_nIterations = numIter;//10
			if (!gCpuSolveConstraint)
			{
				B3_PROFILE("GPU solveContactConstraint");

				/*m_data->m_solverGPU->solveContactConstraint(
				m_data->m_bodyBufferGPU, 
				m_data->m_inertiaBufferGPU,
				m_data->m_contactCGPU,0,
				nContactOut ,
				maxNumBatches);
				*/

				//m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes);

				if (gUseLargeBatches)
				{
					solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, 
						m_data->m_inertiaBufferGPU,
						m_data->m_contactCGPU,0,
						nContactOut ,
						maxNumBatches,numIter,&m_data->m_batchSizes);
				} else
				{
					solveContactConstraint(
						m_data->m_bodyBufferGPU, 
						m_data->m_inertiaBufferGPU,
						m_data->m_contactCGPU,0,
						nContactOut ,
						maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu);
				}
			}
			else
			{
				B3_PROFILE("Host solveContactConstraint");

				m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes);
			}
            
            
        }
        
        
#if 0
        if (0)
        {
            B3_PROFILE("read body velocities back to CPU");
            //read body updated linear/angular velocities back to CPU
            m_data->m_bodyBufferGPU->read(
                                                  m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies);
            adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL );
        }
#endif
        
    }

}
Пример #15
0
void b3CpuNarrowPhase::computeContacts(b3AlignedObjectArray<b3Int4>& pairs, b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, b3AlignedObjectArray<b3RigidBodyData>& bodies)
{
	int nPairs = pairs.size();
	int numContacts = 0;
	int maxContactCapacity = m_data->m_config.m_maxContactCapacity;
	m_data->m_contacts.resize(maxContactCapacity);

	for (int i = 0; i < nPairs; i++)
	{
		int bodyIndexA = pairs[i].x;
		int bodyIndexB = pairs[i].y;
		int collidableIndexA = bodies[bodyIndexA].m_collidableIdx;
		int collidableIndexB = bodies[bodyIndexB].m_collidableIdx;

		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_SPHERE &&
			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
		{
			//			computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
			//				&m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
		}

		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_SPHERE)
		{
			//			computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
			//				&m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			//printf("convex-sphere\n");
		}

		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE)
		{
			//			computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			//			printf("convex-plane\n");
		}

		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE &&
			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
		{
			//			computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			//			printf("plane-convex\n");
		}

		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
		{
			//			computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0],
			//			nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU);
			//			printf("convex-plane\n");
		}

		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&
			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_PLANE)
		{
			//			computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&bodies[0],
			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			//			printf("convex-plane\n");
		}

		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_PLANE &&
			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)
		{
			//			computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&bodies[0],
			//			&m_data->m_collidablesCPU[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity);
			//			printf("plane-convex\n");
		}

		if (m_data->m_collidablesCPU[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&
			m_data->m_collidablesCPU[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)
		{
			//printf("pairs[i].z=%d\n",pairs[i].z);
			//int contactIndex = computeContactConvexConvex2(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,bodies,
			//		m_data->m_collidablesCPU,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts);
			int contactIndex = b3ContactConvexConvexSAT(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, bodies,
														m_data->m_collidablesCPU, m_data->m_convexPolyhedra, m_data->m_convexVertices, m_data->m_uniqueEdges, m_data->m_convexIndices, m_data->m_convexFaces, m_data->m_contacts, numContacts, maxContactCapacity);

			if (contactIndex >= 0)
			{
				pairs[i].z = contactIndex;
			}
			//			printf("plane-convex\n");
		}
	}

	m_data->m_contacts.resize(numContacts);
}
Пример #16
0
void b3RadixSort32CL::executeHost(b3AlignedObjectArray<b3SortData>& inout, int sortBits /* = 32 */)
{
	int n = inout.size();
	const int BITS_PER_PASS = 8;
	const int NUM_TABLES = (1<<BITS_PER_PASS);


	int tables[NUM_TABLES];
	int counter[NUM_TABLES];

	b3SortData* src = &inout[0];
	b3AlignedObjectArray<b3SortData> workbuffer;
	workbuffer.resize(inout.size());
	b3SortData* dst = &workbuffer[0];

	int count=0;
	for(int startBit=0; startBit<sortBits; startBit+=BITS_PER_PASS)
	{
		for(int i=0; i<NUM_TABLES; i++)
		{
			tables[i] = 0;
		}

		for(int i=0; i<n; i++)
		{
			int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES-1);
			tables[tableIdx]++;
		}
//#define TEST
#ifdef TEST
		printf("histogram size=%d\n",NUM_TABLES);
		for (int i=0;i<NUM_TABLES;i++)
		{
			if (tables[i]!=0)
			{
				printf("tables[%d]=%d]\n",i,tables[i]);
			}

		}
#endif //TEST
		//	prefix scan
		int sum = 0;
		for(int i=0; i<NUM_TABLES; i++)
		{
			int iData = tables[i];
			tables[i] = sum;
			sum += iData;
			counter[i] = 0;
		}

		//	distribute
		for(int i=0; i<n; i++)
		{
			int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES-1);
			
			dst[tables[tableIdx] + counter[tableIdx]] = src[i];
			counter[tableIdx] ++;
		}

		b3Swap( src, dst );
		count++;
	}

	if (count&1)
	{
		b3Assert(0);//need to copy 

	}
}
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal)
{
	//only create the batches once.
	//@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated
	B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");

	bool createBatches = m_gpuData->m_batchSizes.size() == 0;
	{
		if (createBatches)
		{
			m_gpuData->m_batchSizes.resize(0);

			{
				m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);

				B3_PROFILE("batch joints");
				b3Assert(batchConstraints.size() == numConstraints);
				int simdWidth = numConstraints + 1;
				int numBodies = m_tmpSolverBodyPool.size();
				sortConstraintByBatch3(&batchConstraints[0], numConstraints, simdWidth, m_staticIdx, numBodies);

				m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
			}
		}
		else
		{
			/*b3AlignedObjectArray<b3BatchConstraint> cpuCheckBatches;
			m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches);
			b3Assert(cpuCheckBatches.size()==batchConstraints.size());
			printf(".\n");
			*/
			//>copyFromHost(batchConstraints);
		}
		int maxIterations = infoGlobal.m_numIterations;

		bool useBatching = true;

		if (useBatching)
		{
			if (!useGpuSolveJointConstraintRows)
			{
				B3_PROFILE("copy to host");
				m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
				m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
				m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
				m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1);
				m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets);
				gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints);
			}

			for (int iteration = 0; iteration < maxIterations; iteration++)
			{
				int batchOffset = 0;
				int constraintOffset = 0;
				int numBatches = m_gpuData->m_batchSizes.size();
				for (int bb = 0; bb < numBatches; bb++)
				{
					int numConstraintsInBatch = m_gpuData->m_batchSizes[bb];

					if (useGpuSolveJointConstraintRows)
					{
						B3_PROFILE("solveJointConstraintRowsKernels");

						/*
						__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,
					  __global b3BatchConstraint* batchConstraints,
					  	__global b3SolverConstraint* rows,
						__global unsigned int* numConstraintRowsInfo1, 
						__global unsigned int* rowOffsets,
						__global b3GpuGenericConstraint* constraints,
						int batchOffset,
						int numConstraintsInBatch*/

						b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_solveJointConstraintRowsKernels, "m_solveJointConstraintRowsKernels");
						launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
						launcher.setBuffer(gpuConstraints1->getBufferCL());  //to detect disabled constraints
						launcher.setConst(batchOffset);
						launcher.setConst(numConstraintsInBatch);

						launcher.launch1D(numConstraintsInBatch);
					}
					else  //useGpu
					{
						for (int b = 0; b < numConstraintsInBatch; b++)
						{
							const b3BatchConstraint& c = batchConstraints[batchOffset + b];
							/*printf("-----------\n");
							printf("bb=%d\n",bb);
							printf("c.batchId = %d\n", c.m_batchId);
							*/
							b3Assert(c.m_batchId == bb);
							b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex];
							if (constraint->m_flags & B3_CONSTRAINT_FLAG_ENABLED)
							{
								int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex];
								int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex];

								for (int jj = 0; jj < numConstraintRows; jj++)
								{
									//
									b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset + jj];
									//resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
									resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint);
								}
							}
						}
					}  //useGpu
					batchOffset += numConstraintsInBatch;
					constraintOffset += numConstraintsInBatch;
				}
			}  //for (int iteration...

			if (!useGpuSolveJointConstraintRows)
			{
				{
					B3_PROFILE("copy from host");
					m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool);
					m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
					m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool);
				}

				//B3_PROFILE("copy to host");
				//m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
			}
			//int sz = sizeof(b3GpuSolverBody);
			//printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz);
		}
		else
		{
			for (int iteration = 0; iteration < maxIterations; iteration++)
			{
				int numJoints = m_tmpSolverNonContactConstraintPool.size();
				for (int j = 0; j < numJoints; j++)
				{
					b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j];
					resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint);
				}

				if (!m_usePgs)
				{
					averageVelocities();
				}
			}
		}
	}
	clFinish(m_gpuData->m_queue);
	return 0.f;
}
Пример #18
0
int main(int argc, char* argv[])
{
	sOpenGLVerbose = false;
	
	float dt = 1./120.f;
	int width = 1024;
	int height=768;

	app = new SimpleOpenGL3App("AllBullet2Demos",width,height);
	app->m_instancingRenderer->setCameraDistance(13);
	app->m_instancingRenderer->setCameraPitch(0);
	app->m_instancingRenderer->setCameraTargetPosition(b3MakeVector3(0,0,0));
	app->m_window->setMouseMoveCallback(MyMouseMoveCallback);
	app->m_window->setMouseButtonCallback(MyMouseButtonCallback);
	app->m_window->setKeyboardCallback(MyKeyboardCallback);

	GLint err = glGetError();
    assert(err==GL_NO_ERROR);
	
	sth_stash* fontstash=app->getFontStash();
	gui = new GwenUserInterface;
	gui->init(width,height,fontstash,app->m_window->getRetinaScale());

	int numDemos = sizeof(allDemos)/sizeof(BulletDemoEntry);
	
	for (int i=0;i<numDemos;i++)
	{
		allNames.push_back(allDemos[i].m_name);
	}
		
	selectDemo(loadCurrentDemoEntry(startFileName));
	gui->registerComboBox(DEMO_SELECTION_COMBOBOX,allNames.size(),&allNames[0],sCurrentDemoIndex);
		
	//const char* names2[] = {"comboF", "comboG","comboH"};
	//gui->registerComboBox(2,3,&names2[0],0);

	gui->setComboBoxCallback(MyComboBoxCallback);


	do
	{

		GLint err = glGetError();
		assert(err==GL_NO_ERROR);
		app->m_instancingRenderer->init();
		app->m_instancingRenderer->updateCamera();
		
		app->drawGrid();
		
		if (0)
		{
		char bla[1024];
		static int frameCount = 0;
		frameCount++;
		sprintf(bla,"Simple test frame %d", frameCount);
		
		app->drawText(bla,10,10);
		}

		if (sCurrentDemo)
		{
			if (!pauseSimulation)
				sCurrentDemo->stepSimulation(1./60.f);
			sCurrentDemo->renderScene();
		}

		static int toggle = 1;
		if (1)
		{
		gui->draw(app->m_instancingRenderer->getScreenWidth(),app->m_instancingRenderer->getScreenHeight());
		}
		toggle=1-toggle;
		app->swapBuffer();
	} while (!app->m_window->requestedExit());

	selectDemo(0);
	delete gui;
	delete app;
	return 0;
}