TinyRendererVisualShapeConverterInternalData()
	:m_upAxis(2),
	m_swWidth(START_WIDTH),
	m_swHeight(START_HEIGHT),
	m_rgbColorBuffer(START_WIDTH,START_HEIGHT,TGAImage::RGB)
	{
	    m_depthBuffer.resize(m_swWidth*m_swHeight);
	    m_segmentationMaskBuffer.resize(m_swWidth*m_swHeight,-1);
	}
Ejemplo n.º 2
0
void	b3GeometryUtil::getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations , b3AlignedObjectArray<b3Vector3>& verticesOut )
{
	const int numbrushes = planeEquations.size();
	// brute force:
	for (int i=0;i<numbrushes;i++)
	{
		const b3Vector3& N1 = planeEquations[i];
		

		for (int j=i+1;j<numbrushes;j++)
		{
			const b3Vector3& N2 = planeEquations[j];
				
			for (int k=j+1;k<numbrushes;k++)
			{

				const b3Vector3& N3 = planeEquations[k];

				b3Vector3 n2n3; n2n3 = N2.cross(N3);
				b3Vector3 n3n1; n3n1 = N3.cross(N1);
				b3Vector3 n1n2; n1n2 = N1.cross(N2);
				
				if ( ( n2n3.length2() > b3Scalar(0.0001) ) &&
					 ( n3n1.length2() > b3Scalar(0.0001) ) &&
					 ( n1n2.length2() > b3Scalar(0.0001) ) )
				{
					//point P out of 3 plane equations:

					//	d1 ( N2 * N3 ) + d2 ( N3 * N1 ) + d3 ( N1 * N2 )  
					//P =  -------------------------------------------------------------------------  
					//   N1 . ( N2 * N3 )  


					b3Scalar quotient = (N1.dot(n2n3));
					if (b3Fabs(quotient) > b3Scalar(0.000001))
					{
						quotient = b3Scalar(-1.) / quotient;
						n2n3 *= N1[3];
						n3n1 *= N2[3];
						n1n2 *= N3[3];
						b3Vector3 potentialVertex = n2n3;
						potentialVertex += n3n1;
						potentialVertex += n1n2;
						potentialVertex *= quotient;

						//check if inside, and replace supportingVertexOut if needed
						if (isPointInsidePlanes(planeEquations,potentialVertex,b3Scalar(0.01)))
						{
							verticesOut.push_back(potentialVertex);
						}
					}
				}
			}
		}
	}
}
Ejemplo n.º 3
0
///todo: add some acceleration structure (AABBs, tree etc)
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{
	
	//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);

	B3_PROFILE("castRaysGPU");

	b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
	b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);

	{
		B3_PROFILE("raycast copyFromHost");
		gpuRays.copyFromHost(rays);

	
		gpuHitResults.resize(hitResults.size());
		gpuHitResults.copyFromHost(hitResults);
	}


	//run kernel
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(gpuRays.getBufferCL());
		launcher.setBuffer(gpuHitResults.getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
		
		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}

	//copy results
	{
		B3_PROFILE("raycast copyToHost");
		gpuHitResults.copyToHost(hitResults);
	}

}
Ejemplo n.º 4
0
    virtual bool fragment(Vec3f bar, TGAColor &color) {
		//B3_PROFILE("fragment");
        Vec4f p = m_viewportMat*(varying_tri_light_view*bar);
        float depth = p[2];
        p = p/p[3];
        
		float index_x = b3Max(float(0.0), b3Min(float(m_width-1), p[0]));
		float index_y = b3Max(float(0.0), b3Min(float(m_height-1), p[1]));
		int idx = int(index_x) + int(index_y)*m_width; // index in the shadowbuffer array
        float shadow = 0.8+0.2*(m_shadowBuffer->at(idx)<-depth+0.05); // magic coeff to avoid z-fighting
        
        Vec3f bn = (varying_nrm*bar).normalize();
        Vec2f uv = varying_uv*bar;
        
        Vec3f reflection_direction = (bn * (bn * m_light_dir_local * 2.f) - m_light_dir_local).normalize();
        float specular = pow(b3Max(reflection_direction.z, 0.f), m_model->specular(uv));
        float diffuse = b3Max(0.f, bn * m_light_dir_local);
        
        color = m_model->diffuse(uv);
        color[0] *= m_colorRGBA[0];
        color[1] *= m_colorRGBA[1];
        color[2] *= m_colorRGBA[2];
        color[3] *= m_colorRGBA[3];
        
        for (int i = 0; i < 3; ++i)
        {
            color[i] = b3Min(int(m_ambient_coefficient*color[i] + shadow*(m_diffuse_coefficient*diffuse+m_specular_coefficient*specular)*color[i]*m_light_color[i]), 255);
        }
        
        return false;

    }
Ejemplo n.º 5
0
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables)
{

//	return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);

	B3_PROFILE("castRaysHost");

	for (int r=0;r<rays.size();r++)
	{
		b3Vector3 rayFrom = rays[r].m_from;
		b3Vector3 rayTo = rays[r].m_to;

		//if there is a hit, color the pixels
		bool hits  = false;

		for (int b=0;b<numBodies && !hits;b++)
		{
				
			const b3Vector3& pos = bodies[b].m_pos;
			const b3Quaternion& orn = bodies[b].m_quat;
			
			b3Scalar radius = 1;

			if (sphere_intersect(pos,  radius, rayFrom, rayTo))
				hits = true;
		}
		if (hits)
			hitResults[r].m_hitFraction = 0.f;

	}
}
Ejemplo n.º 6
0
void	b3GeometryUtil::getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut )
{
		const int numvertices = vertices.size();
	// brute force:
	for (int i=0;i<numvertices;i++)
	{
		const b3Vector3& N1 = vertices[i];
		

		for (int j=i+1;j<numvertices;j++)
		{
			const b3Vector3& N2 = vertices[j];
				
			for (int k=j+1;k<numvertices;k++)
			{

				const b3Vector3& N3 = vertices[k];

				b3Vector3 planeEquation,edge0,edge1;
				edge0 = N2-N1;
				edge1 = N3-N1;
				b3Scalar normalSign = b3Scalar(1.);
				for (int ww=0;ww<2;ww++)
				{
					planeEquation = normalSign * edge0.cross(edge1);
					if (planeEquation.length2() > b3Scalar(0.0001))
					{
						planeEquation.normalize();
						if (notExist(planeEquation,planeEquationsOut))
						{
							planeEquation[3] = -planeEquation.dot(N1);
							
								//check if inside, and replace supportingVertexOut if needed
								if (areVerticesBehindPlane(planeEquation,vertices,b3Scalar(0.01)))
								{
									planeEquationsOut.push_back(planeEquation);
								}
						}
					}
					normalSign = b3Scalar(-1.);
				}
			
			}
		}
	}

}
	TinyRendererGUIHelper( int swWidth, int swHeight) 
		: m_upAxis(1),
	 m_swWidth(swWidth),
    m_swHeight(swHeight),
	m_rgbColorBuffer(swWidth,swHeight,TGAImage::RGB),
	m_colObjUniqueIndex(0)
	{
		m_depthBuffer.resize(swWidth*swHeight);
	}
Ejemplo n.º 8
0
static bool clipTriangleAgainstNearplane(const mat<4,3,float>& triangleIn, b3AlignedObjectArray<mat<4,3,float> >& clippedTrianglesOut)
{


	//discard triangle if all vertices are behind near-plane
	if (triangleIn[3][0]<0 && triangleIn[3][1] <0 && triangleIn[3][2] <0)
	{
		return true;
	}

	//accept triangle if all vertices are in front of the near-plane
	if (triangleIn[3][0]>=0 && triangleIn[3][1] >=0 && triangleIn[3][2] >=0)
	{
		clippedTrianglesOut.push_back(triangleIn);
		return false;
	}

	Vec4f vtxCache[5];

	b3AlignedObjectArray<Vec4f> vertices;
	vertices.initializeFromBuffer(vtxCache,0,5);
	clipEdge(triangleIn,0,1,vertices);
	clipEdge(triangleIn,1,2,vertices);
	clipEdge(triangleIn,2,0,vertices);

	if (vertices.size()<3)
		return true;
	
	if (equals(vertices[0],vertices[vertices.size()-1]))
	{
		vertices.pop_back();
	}

	//create a fan of triangles
	for (int i=1;i<vertices.size()-1;i++)
	{
		mat<4,3,float>& vtx = clippedTrianglesOut.expand();
		vtx.set_col(0,vertices[0]);
		vtx.set_col(1,vertices[i]);
		vtx.set_col(2,vertices[i+1]);
	}
	return true;
}
Ejemplo n.º 9
0
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables)
{
	B3_PROFILE("castRaysGPU");

	b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q);
	gpuRays.copyFromHost(rays);

	b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q);
	gpuHitResults.resize(hitResults.size());

	b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q);
	gpuBodies.resize(numBodies);
	gpuBodies.copyFromHostPointer(bodies,numBodies);

	b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q);
	gpuCollidables.resize(numCollidables);
	gpuCollidables.copyFromHostPointer(collidables,numCollidables);


	//run kernel
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel);
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(gpuRays.getBufferCL());
		launcher.setBuffer(gpuHitResults.getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(gpuBodies.getBufferCL());
		launcher.setBuffer(gpuCollidables.getBufferCL());

		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}

	//copy results
	gpuHitResults.copyToHost(hitResults);

}
Ejemplo n.º 10
0
	ParticleInternalData()
		:
		m_clPositionBuffer(0),
		m_velocitiesGPU(0),
		m_simParamGPU(0),
		m_updatePositionsKernel(0),
		m_updatePositionsKernel2(0),
		m_updateAabbsKernel(0),
		m_collideParticlesKernel(0)
	{
		m_simParamCPU.resize(1);
	}
	SW_And_OpenGLGuiHelper(CommonGraphicsApp* glApp, bool useOpenGL2, int swWidth, int swHeight, GLPrimitiveRenderer* primRenderer)
		: OpenGLGuiHelper(glApp, useOpenGL2),
		  m_swWidth(swWidth),
		  m_swHeight(swHeight),
		  m_rgbColorBuffer(swWidth, swHeight, TGAImage::RGB),
		  m_primRenderer(primRenderer)
	{
		m_depthBuffer.resize(swWidth * swHeight);
		CommonRenderInterface* render = getRenderInterface();
		m_image = new unsigned char[m_swWidth * m_swHeight * 4];

		m_textureHandle = render->registerTexture(m_image, m_swWidth, m_swHeight);
	}
Ejemplo n.º 12
0
bool notExist(const b3Vector3& planeEquation,const b3AlignedObjectArray<b3Vector3>& planeEquations)
{
	int numbrushes = planeEquations.size();
	for (int i=0;i<numbrushes;i++)
	{
		const b3Vector3& N1 = planeEquations[i];
		if (planeEquation.dot(N1) > b3Scalar(0.999))
		{
			return false;
		} 
	}
	return true;
}
Ejemplo n.º 13
0
static void clipEdge(const mat<4, 3, float>& triangleIn, int vertexIndexA, int vertexIndexB, b3AlignedObjectArray<Vec4f>& vertices)
{
	Vec4f v0New = triangleIn.col(vertexIndexA);
	Vec4f v1New = triangleIn.col(vertexIndexB);

	bool v0Inside = v0New[3] > 0.f && v0New[2] > -v0New[3];
	bool v1Inside = v1New[3] > 0.f && v1New[2] > -v1New[3];

	if (v0Inside && v1Inside)
	{
	}
	else if (v0Inside || v1Inside)
	{
		float d0 = v0New[2] + v0New[3];
		float d1 = v1New[2] + v1New[3];
		float factor = 1.0 / (d1 - d0);
		Vec4f newVertex = (v0New * d1 - v1New * d0) * factor;
		if (v0Inside)
		{
			v1New = newVertex;
		}
		else
		{
			v0New = newVertex;
		}
	}
	else
	{
		return;
	}

	if (vertices.size() == 0 || !(equals(vertices[vertices.size() - 1], v0New)))
	{
		vertices.push_back(v0New);
	}

	vertices.push_back(v1New);
}
Ejemplo n.º 14
0
bool	b3GeometryUtil::areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar	margin)
{
	int numvertices = vertices.size();
	for (int i=0;i<numvertices;i++)
	{
		const b3Vector3& N1 = vertices[i];
		b3Scalar dist = b3Scalar(planeNormal.dot(N1))+b3Scalar(planeNormal[3])-margin;
		if (dist>b3Scalar(0.))
		{
			return false;
		}
	}
	return true;
}
Ejemplo n.º 15
0
	TinyRendererSetupInternalData(int width, int height)
		:m_roll(0),
		m_pitch(0),
		m_yaw(0),

		m_width(width),
		m_height(height),
		m_rgbColorBuffer(width,height,TGAImage::RGB),
		m_textureHandle(0),
		m_animateRenderer(0)
	{
		m_depthBuffer.resize(m_width*m_height);

    }
Ejemplo n.º 16
0
bool	b3GeometryUtil::isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar	margin)
{
	int numbrushes = planeEquations.size();
	for (int i=0;i<numbrushes;i++)
	{
		const b3Vector3& N1 = planeEquations[i];
		b3Scalar dist = b3Scalar(N1.dot(point))+b3Scalar(N1[3])-margin;
		if (dist>b3Scalar(0.))
		{
			return false;
		}
	}
	return true;
		
}
Ejemplo n.º 17
0
void	MyComboBoxCallback(int comboId, const char* item)
{
	//printf("comboId = %d, item = %s\n",comboId, item);
	if (comboId==DEMO_SELECTION_COMBOBOX)
	{
		//find selected item
		for (int i=0;i<allNames.size();i++)
		{
			if (strcmp(item,allNames[i])==0)
			{
				selectDemo(i);
				saveCurrentSettings(sCurrentDemoIndex,startFileName);
				break;
			}
		}
	}

}
Ejemplo n.º 18
0
void	MyComboBoxCallback(int comboId, const char* item)
{
	int numDemos = demoNames.size();
	for (int i=0;i<numDemos;i++)
	{
		if (!strcmp(demoNames[i],item))
		{
			if (selectedDemo != i)
			{
				gReset = true;
				selectedDemo = i;
				printf("selected demo %s!\n", item);
			}
		}
	}


}
Ejemplo n.º 19
0
    RenderInstancingDemo(CommonGraphicsApp* app)
    :m_app(app),
    m_x(0),
    m_y(0),
	m_z(0)
    {
		m_app->setUpAxis(2);
        
		 {
             b3Vector3 extents=b3MakeVector3(100,100,100);
             extents[m_app->getUpAxis()]=1;
             
			 int xres = 20;
			 int yres = 20;
			 
			 b3Vector4 color0=b3MakeVector4(0.1, 0.1, 0.1,1);
			 b3Vector4 color1=b3MakeVector4(0.6, 0.6, 0.6,1);
            m_app->registerGrid(xres, yres, color0, color1);
        }
		 
        {
            int boxId = m_app->registerCubeShape(0.1,0.1,0.1);
           
            
            
            for (int i=-numCubesX/2;i<numCubesX/2;i++)
            {
                for (int j = -numCubesY/2;j<numCubesY/2;j++)
                {
                    b3Vector3 pos=b3MakeVector3(i,j,j);
                    pos[app->getUpAxis()] = 1;
                    b3Quaternion orn(0,0,0,1);
                    b3Vector4 color=b3MakeVector4(0.3,0.3,0.3,1);
                    b3Vector3 scaling=b3MakeVector3(1,1,1);
                   int instanceId = m_app->m_renderer->registerGraphicsInstance(boxId,pos,orn,color,scaling);
				   m_movingInstances.push_back(instanceId);
                }
            }
        }

		 m_app->m_renderer->writeTransforms();
    }
Ejemplo n.º 20
0
int main(int argc, char* argv[])
{

	int sz = sizeof(b3Generic6DofConstraint);
	int sz2 = sizeof(b3Point2PointConstraint);
	int sz3 = sizeof(b3TypedConstraint);
	int sz4 = sizeof(b3TranslationalLimitMotor);
	int sz5 = sizeof(b3RotationalLimitMotor);
	int sz6 = sizeof(b3Transform);

	//b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache");


	b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile);
	b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile);


	b3SetCustomPrintfFunc(myprintf);
	b3Vector3 test=b3MakeVector3(1,2,3);
	test.x = 1;
	test.y = 4;

    printf("main start");

	b3CommandLineArgs args(argc,argv);
	ParticleDemo::ConstructionInfo ci;

	if (args.CheckCmdLineFlag("help"))
	{
		Usage();
		return 0;
	}

	selectedDemo =  loadCurrentDemoEntry(sStartFileName);


	args.GetCmdLineArgument("selected_demo",selectedDemo);


	if (args.CheckCmdLineFlag("new_batching"))
	{
		useNewBatchingKernel = true;
	}
	bool benchmark=args.CheckCmdLineFlag("benchmark");
	args.GetCmdLineArgument("max_framecount",maxFrameCount);

	args.GetCmdLineArgument("shadowmap_size",shadowMapWorldSize);

	args.GetCmdLineArgument("shadowmap_resolution",shadowMapWidth);
	shadowMapHeight=shadowMapWidth;
	if (args.CheckCmdLineFlag("disable_shadowmap"))
	{
		useShadowMap = false;
	}

	args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName);

	gDebugLauncherCL = args.CheckCmdLineFlag("debug_kernel_launch");

	dump_timings=args.CheckCmdLineFlag("dump_timings");
	ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl");
	ci.m_useConcaveMesh = true;//args.CheckCmdLineFlag("use_concave_mesh");
	if (ci.m_useConcaveMesh)
	{
		enableExperimentalCpuConcaveCollision = true;
	}
	ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes");
	args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex);
	args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex);
	gAllowCpuOpenCL = args.CheckCmdLineFlag("allow_opencl_cpu");
	gUseLargeBatches = args.CheckCmdLineFlag("use_large_batches");

	gUseJacobi = args.CheckCmdLineFlag("use_jacobi");
	gUseDbvt = args.CheckCmdLineFlag("use_dbvt");
	gDumpContactStats = args.CheckCmdLineFlag("dump_contact_stats");
	gCalcWorldSpaceAabbOnCpu = args.CheckCmdLineFlag("calc_aabb_cpu");
	gUseCalculateOverlappingPairsHost = args.CheckCmdLineFlag("calc_pairs_cpu");
	gIntegrateOnCpu = args.CheckCmdLineFlag("integrate_cpu");
	gConvertConstraintOnCpu = args.CheckCmdLineFlag("convert_constraints_cpu");
	useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid");




	args.GetCmdLineArgument("x_dim", ci.arraySizeX);
	args.GetCmdLineArgument("y_dim", ci.arraySizeY);
	args.GetCmdLineArgument("z_dim", ci.arraySizeZ);
	args.GetCmdLineArgument("x_gap", ci.gapX);
	args.GetCmdLineArgument("y_gap", ci.gapY);
	args.GetCmdLineArgument("z_gap", ci.gapZ);

	gPause = args.CheckCmdLineFlag("paused");

	gDebugForceLoadingFromSource = args.CheckCmdLineFlag("load_cl_kernels_from_disk");
	gDebugSkipLoadingBinary = args.CheckCmdLineFlag("disable_cached_cl_kernels");


#ifndef B3_NO_PROFILE
	b3ProfileManager::Reset();
#endif //B3_NO_PROFILE


	window = new b3gDefaultOpenGLWindow();

	b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight);

	window->createWindow(wci);
	window->setResizeCallback(MyResizeCallback);
	window->setMouseMoveCallback(MyMouseMoveCallback);
	window->setMouseButtonCallback(MyMouseButtonCallback);
	window->setKeyboardCallback(MyKeyboardCallback);

	window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org");
	printf("-----------------------------------------------------\n");




#ifndef __APPLE__
	glewInit();
#endif

	gui = new GwenUserInterface();

    printf("started GwenUserInterface");


	GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight);

	stash = initFont(&prim);


	if (gui)
	{
		gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale());

		printf("init fonts");


		gui->setToggleButtonCallback(MyButtonCallback);

		gui->registerToggleButton(MYPAUSE,"Pause");
		gui->registerToggleButton(MYPROFILE,"Profile");
		gui->registerToggleButton(MYRESET,"Reset");

		int numItems = sizeof(allDemos)/sizeof(ParticleDemo::CreateFunc*);
		demoNames.clear();
		for (int i=0;i<numItems;i++)
		{
			GpuDemo* demo = allDemos[i]();
			demoNames.push_back(demo->getName());
			delete demo;
		}

		gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0],selectedDemo);
		gui->setComboBoxCallback(MyComboBoxCallback);
	}



	do
	{
		bool syncOnly = false;
		gReset = false;

			{
		GLint err;
		glEnable(GL_BLEND);
		err = glGetError();
		b3Assert(err==GL_NO_ERROR);
		glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
		glDisable(GL_DEPTH_TEST);
		err = glGetError();
		b3Assert(err==GL_NO_ERROR);
		window->startRendering();
		glClearColor(1,1,1,1);
		glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT);
		glEnable(GL_DEPTH_TEST);

		sth_begin_draw(stash);
		//sth_draw_text(stash, droidRegular,12.f, dx, dy-50, "How does this OpenGL True Type font look? ", &dx,width,height);
		int spacing = 0;//g_OpenGLHeight;
		float sx,sy,dx,dy,lh;
		sx = 0;
		sy = g_OpenGLHeight;
		dx = sx; dy = sy;
		//if (1)
		const char* msg[] = {"Please wait, initializing the OpenCL demo",
			"Please make sure to run the demo on a high-end discrete GPU with OpenCL support",
			"The first time it can take a bit longer to compile the OpenCL kernels.",
			"Check the console if it takes longer than 1 minute or if a demos has issues.",
			"Please share the full commandline output when reporting issues:",
			"App_Bullet3_OpenCL_Demos_* >> error.log",

			"",
			"",
#ifdef _DEBUG
			"Some of the demos load a large .obj file,",
			"please use an optimized build of this app for faster parsing",

			"",
			"",
#endif
			"You can press F1 to create a single screenshot,",
			"or press F2 toggle screenshot (useful to create movies)",
			"",
			"",
			"There are various command-line options such as --benchmark",
			"See http://github.com/erwincoumans/bullet3 for more information"
		};
		int fontSize = 68;

		int nummsg = sizeof(msg)/sizeof(const char*);
		for (int i=0;i<nummsg;i++)
		{
			char txt[512];
			sprintf(txt,"%s",msg[i]);
				//sth_draw_text(stash, droidRegular,i, 10, dy-spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight);
				sth_draw_text(stash, droidRegular,fontSize, 10, spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight);
				spacing+=fontSize;
			fontSize = 32;
		}

		sth_end_draw(stash);
		sth_flush_draw(stash);
		window->endRendering();
	}



	static bool once=true;


	//glClearColor(0.3f, 0.3f, 0.3f, 1.0f);
	glClearColor(1,1,1,1);
	glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);

	window->setWheelCallback(b3DefaultWheelCallback);




	{
		GpuDemo* demo = allDemos[selectedDemo]();
		sDemo = demo;
//		demo->myinit();
		bool useGpu = false;


		//int maxObjectCapacity=128*1024;
		int maxObjectCapacity=1024*1024;
		maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10);

		{
		ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity);//render.getInstancingRenderer();
		ci.m_window = window;
		ci.m_gui = gui;
		ci.m_instancingRenderer->init();
		ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
		ci.m_instancingRenderer->InitShaders();
		ci.m_primRenderer = &prim;
//		render.init();
		}

		{
			demo->initPhysics(ci);
		}





		printf("-----------------------------------------------------\n");

		FILE* csvFile = 0;
		FILE* detailsFile = 0;

		if (benchmark)
		{

			char prefixFileName[1024];
			char csvFileName[1024];
			char detailsFileName[1024];

			b3OpenCLDeviceInfo info;
			b3OpenCLUtils::getDeviceInfo(demo->getInternalData()->m_clDevice,&info);

			//todo: move this time stuff into the Platform/Window class
#ifdef _WIN32
			SYSTEMTIME time;
			GetLocalTime(&time);
			char buf[1024];
			DWORD dwCompNameLen = 1024;
			if (0 != GetComputerName(buf, &dwCompNameLen))
			{
				printf("%s", buf);
			} else
			{
				printf("unknown", buf);
			}

			sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond);

#else
			timeval now;
			gettimeofday(&now,0);

			struct tm* ptm;
			ptm = localtime (&now.tv_sec);
			char buf[1024];
#ifdef __APPLE__
			sprintf(buf,"MacOSX");
#else
			sprintf(buf,"Unix");
#endif
			sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,
					ptm->tm_mday,
					ptm->tm_mon+1,
					ptm->tm_year+1900,
					ptm->tm_hour,
					ptm->tm_min,
					ptm->tm_sec);

#endif

			sprintf(csvFileName,"%s.csv",prefixFileName);
			sprintf(detailsFileName,"%s.txt",prefixFileName);
			printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName);

			//GetSystemTime(&time2);

			csvFile=fopen(csvFileName,"w");
			detailsFile = fopen(detailsFileName,"w");
			if (detailsFile)
				defaultOutput = detailsFile;

			//if (f)
			//	fprintf(f,"%s (%dx%dx%d=%d),\n",  g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ);
		}


		fprintf(defaultOutput,"Demo settings:\n");
		fprintf(defaultOutput,"  SelectedDemo=%d, demoname = %s\n", selectedDemo, demo->getName());
		fprintf(defaultOutput,"  x_dim=%d, y_dim=%d, z_dim=%d\n",ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ);
		fprintf(defaultOutput,"  x_gap=%f, y_gap=%f, z_gap=%f\n",ci.gapX,ci.gapY,ci.gapZ);
		fprintf(defaultOutput,"\nOpenCL settings:\n");
		fprintf(defaultOutput,"  Preferred cl_device index %d\n", ci.preferredOpenCLDeviceIndex);
		fprintf(defaultOutput,"  Preferred cl_platform index%d\n", ci.preferredOpenCLPlatformIndex);
		fprintf(defaultOutput,"\n");

		if (demo->getInternalData()->m_platformId)
		{
			b3OpenCLUtils::printPlatformInfo( demo->getInternalData()->m_platformId);
			fprintf(defaultOutput,"\n");
			b3OpenCLUtils::printDeviceInfo( demo->getInternalData()->m_clDevice);
			fprintf(defaultOutput,"\n");
		}
		do
		{


			GLint err = glGetError();
			assert(err==GL_NO_ERROR);


			if (exportFrame || exportMovie)
			{

				if (!renderTexture)
				{
					renderTexture = new GLRenderToTexture();
					GLuint renderTextureId;
					glGenTextures(1, &renderTextureId);

					// "Bind" the newly created texture : all future texture functions will modify this texture
					glBindTexture(GL_TEXTURE_2D, renderTextureId);

					// Give an empty image to OpenGL ( the last "0" )
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0);
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);
					glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);

					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);

					renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR);
				}

				bool result = renderTexture->enable();
			}

			err = glGetError();
			assert(err==GL_NO_ERROR);

			b3ProfileManager::Reset();
			b3ProfileManager::Increment_Frame_Counter();

//			render.reshape(g_OpenGLWidth,g_OpenGLHeight);
			ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
			prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			window->startRendering();

			err = glGetError();
			assert(err==GL_NO_ERROR);

			glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT);
			glEnable(GL_DEPTH_TEST);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			if (!gPause)
			{
				B3_PROFILE("clientMoveAndDisplay");

				demo->clientMoveAndDisplay();
			}
			else
			{

			}

			{
				B3_PROFILE("renderScene");
				demo->renderScene();
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			/*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects())
			{
				B3_PROFILE("renderPhysicsWorld");
				b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray();
				b3CollisionObject** colObjArray = &arr[0];

				render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly);
				syncOnly = true;

			}
			*/


			if (exportFrame || exportMovie)
			{

				char fileName[1024];
				sprintf(fileName,"screenShot%d.png",frameIndex++);
				writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName);
				exportFrame = false;
				renderTexture->disable();
			}


			{
				B3_PROFILE("gui->draw");
				if (gui && gDrawGui)
					gui->draw(g_OpenGLWidth,g_OpenGLHeight);
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			{
				B3_PROFILE("window->endRendering");
				window->endRendering();
			}

			err = glGetError();
			assert(err==GL_NO_ERROR);

			{
				B3_PROFILE("glFinish");
			}



			if (dump_timings)
			{
				b3ProfileManager::dumpAll(stdout);
			}

			if (csvFile)
			{
				static int frameCount=0;

				if (frameCount>0)
				{
					DumpSimulationTime(csvFile);
					if (detailsFile)
					{
							fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount);
							b3ProfileManager::dumpAll(detailsFile);
					}
				}

				if (frameCount>=maxFrameCount)
					window->setRequestExit();
				frameCount++;
			}


			if (gStep)
				gPause=true;

		} while (!window->requestedExit() && !gReset);


		demo->exitPhysics();
		b3ProfileManager::CleanupMemory();
		delete ci.m_instancingRenderer;

		delete demo;
		sDemo = 0;

		if (detailsFile)
		{
			fclose(detailsFile);
			detailsFile=0;
		}
		if (csvFile)
		{
			fclose(csvFile);
			csvFile=0;
		}
	}



	} while (gReset);


	if (gui)
		gui->setComboBoxCallback(0);

	{



		delete gui;
		gui=0;

		exitFont();


		window->closeWindow();
		delete window;
		window = 0;

	}

	return 0;
}
inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3(b3BatchConstraint* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies)
{
	//int sz = sizeof(b3BatchConstraint);

	B3_PROFILE("sortConstraintByBatch3");

	static int maxSwaps = 0;
	int numSwaps = 0;

	curUsed.resize(2 * simdWidth);

	static int maxNumConstraints = 0;
	if (maxNumConstraints < numConstraints)
	{
		maxNumConstraints = numConstraints;
		//printf("maxNumConstraints  = %d\n",maxNumConstraints );
	}

	int numUsedArray = numBodies / 32 + 1;
	bodyUsed.resize(numUsedArray);

	for (int q = 0; q < numUsedArray; q++)
		bodyUsed[q] = 0;

	int curBodyUsed = 0;

	int numIter = 0;

#if defined(_DEBUG)
	for (int i = 0; i < numConstraints; i++)
		cs[i].m_batchId = -1;
#endif

	int numValidConstraints = 0;
	//	int unprocessedConstraintIndex = 0;

	int batchIdx = 0;

	{
		B3_PROFILE("cpu batch innerloop");

		while (numValidConstraints < numConstraints)
		{
			numIter++;
			int nCurrentBatch = 0;
			//	clear flag
			for (int i = 0; i < curBodyUsed; i++)
				bodyUsed[curUsed[i] / 32] = 0;

			curBodyUsed = 0;

			for (int i = numValidConstraints; i < numConstraints; i++)
			{
				int idx = i;
				b3Assert(idx < numConstraints);
				//	check if it can go
				int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
				int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
				int bodyA = abs(bodyAS);
				int bodyB = abs(bodyBS);
				bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx;
				bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx;
				int aUnavailable = 0;
				int bUnavailable = 0;
				if (!aIsStatic)
				{
					aUnavailable = bodyUsed[bodyA / 32] & (1 << (bodyA & 31));
				}
				if (!aUnavailable)
					if (!bIsStatic)
					{
						bUnavailable = bodyUsed[bodyB / 32] & (1 << (bodyB & 31));
					}

				if (aUnavailable == 0 && bUnavailable == 0)  // ok
				{
					if (!aIsStatic)
					{
						bodyUsed[bodyA / 32] |= (1 << (bodyA & 31));
						curUsed[curBodyUsed++] = bodyA;
					}
					if (!bIsStatic)
					{
						bodyUsed[bodyB / 32] |= (1 << (bodyB & 31));
						curUsed[curBodyUsed++] = bodyB;
					}

					cs[idx].m_batchId = batchIdx;

					if (i != numValidConstraints)
					{
						b3Swap(cs[i], cs[numValidConstraints]);
						numSwaps++;
					}

					numValidConstraints++;
					{
						nCurrentBatch++;
						if (nCurrentBatch == simdWidth)
						{
							nCurrentBatch = 0;
							for (int i = 0; i < curBodyUsed; i++)
								bodyUsed[curUsed[i] / 32] = 0;
							curBodyUsed = 0;
						}
					}
				}
			}
			m_gpuData->m_batchSizes.push_back(nCurrentBatch);
			batchIdx++;
		}
	}

#if defined(_DEBUG)
	//		debugPrintf( "nBatches: %d\n", batchIdx );
	for (int i = 0; i < numConstraints; i++)
	{
		b3Assert(cs[i].m_batchId != -1);
	}
#endif

	if (maxSwaps < numSwaps)
	{
		maxSwaps = numSwaps;
		//printf("maxSwaps = %d\n", maxSwaps);
	}

	return batchIdx;
}
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal)
{
	//only create the batches once.
	//@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated
	B3_PROFILE("GpuSolveGroupCacheFriendlyIterations");

	bool createBatches = m_gpuData->m_batchSizes.size() == 0;
	{
		if (createBatches)
		{
			m_gpuData->m_batchSizes.resize(0);

			{
				m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);

				B3_PROFILE("batch joints");
				b3Assert(batchConstraints.size() == numConstraints);
				int simdWidth = numConstraints + 1;
				int numBodies = m_tmpSolverBodyPool.size();
				sortConstraintByBatch3(&batchConstraints[0], numConstraints, simdWidth, m_staticIdx, numBodies);

				m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
			}
		}
		else
		{
			/*b3AlignedObjectArray<b3BatchConstraint> cpuCheckBatches;
			m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches);
			b3Assert(cpuCheckBatches.size()==batchConstraints.size());
			printf(".\n");
			*/
			//>copyFromHost(batchConstraints);
		}
		int maxIterations = infoGlobal.m_numIterations;

		bool useBatching = true;

		if (useBatching)
		{
			if (!useGpuSolveJointConstraintRows)
			{
				B3_PROFILE("copy to host");
				m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
				m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
				m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
				m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1);
				m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets);
				gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints);
			}

			for (int iteration = 0; iteration < maxIterations; iteration++)
			{
				int batchOffset = 0;
				int constraintOffset = 0;
				int numBatches = m_gpuData->m_batchSizes.size();
				for (int bb = 0; bb < numBatches; bb++)
				{
					int numConstraintsInBatch = m_gpuData->m_batchSizes[bb];

					if (useGpuSolveJointConstraintRows)
					{
						B3_PROFILE("solveJointConstraintRowsKernels");

						/*
						__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,
					  __global b3BatchConstraint* batchConstraints,
					  	__global b3SolverConstraint* rows,
						__global unsigned int* numConstraintRowsInfo1, 
						__global unsigned int* rowOffsets,
						__global b3GpuGenericConstraint* constraints,
						int batchOffset,
						int numConstraintsInBatch*/

						b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_solveJointConstraintRowsKernels, "m_solveJointConstraintRowsKernels");
						launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
						launcher.setBuffer(gpuConstraints1->getBufferCL());  //to detect disabled constraints
						launcher.setConst(batchOffset);
						launcher.setConst(numConstraintsInBatch);

						launcher.launch1D(numConstraintsInBatch);
					}
					else  //useGpu
					{
						for (int b = 0; b < numConstraintsInBatch; b++)
						{
							const b3BatchConstraint& c = batchConstraints[batchOffset + b];
							/*printf("-----------\n");
							printf("bb=%d\n",bb);
							printf("c.batchId = %d\n", c.m_batchId);
							*/
							b3Assert(c.m_batchId == bb);
							b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex];
							if (constraint->m_flags & B3_CONSTRAINT_FLAG_ENABLED)
							{
								int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex];
								int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex];

								for (int jj = 0; jj < numConstraintRows; jj++)
								{
									//
									b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset + jj];
									//resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
									resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint);
								}
							}
						}
					}  //useGpu
					batchOffset += numConstraintsInBatch;
					constraintOffset += numConstraintsInBatch;
				}
			}  //for (int iteration...

			if (!useGpuSolveJointConstraintRows)
			{
				{
					B3_PROFILE("copy from host");
					m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool);
					m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
					m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool);
				}

				//B3_PROFILE("copy to host");
				//m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
			}
			//int sz = sizeof(b3GpuSolverBody);
			//printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz);
		}
		else
		{
			for (int iteration = 0; iteration < maxIterations; iteration++)
			{
				int numJoints = m_tmpSolverNonContactConstraintPool.size();
				for (int j = 0; j < numJoints; j++)
				{
					b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j];
					resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint);
				}

				if (!m_usePgs)
				{
					averageVelocities();
				}
			}
		}
	}
	clFinish(m_gpuData->m_queue);
	return 0.f;
}
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal)
{
	B3_PROFILE("GPU solveGroupCacheFriendlySetup");
	batchConstraints.resize(numConstraints);
	m_gpuData->m_gpuBatchConstraints->resize(numConstraints);
	m_staticIdx = -1;
	m_maxOverrideNumSolverIterations = 0;

	/*	m_gpuData->m_gpuBodies->resize(numBodies);
	m_gpuData->m_gpuBodies->copyFromHostPointer(bodies,numBodies);

	b3OpenCLArray<b3InertiaData> gpuInertias(m_gpuData->m_context,m_gpuData->m_queue);
	gpuInertias.resize(numBodies);
	gpuInertias.copyFromHostPointer(inertias,numBodies);
	*/

	m_gpuData->m_gpuSolverBodies->resize(numBodies);

	m_tmpSolverBodyPool.resize(numBodies);
	{
		if (useGpuInitSolverBodies)
		{
			B3_PROFILE("m_initSolverBodiesKernel");

			b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initSolverBodiesKernel, "m_initSolverBodiesKernel");
			launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
			launcher.setBuffer(gpuBodies->getBufferCL());
			launcher.setConst(numBodies);
			launcher.launch1D(numBodies);
			clFinish(m_gpuData->m_queue);

			//			m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool);
		}
		else
		{
			gpuBodies->copyToHost(m_gpuData->m_cpuBodies);
			for (int i = 0; i < numBodies; i++)
			{
				b3RigidBodyData& body = m_gpuData->m_cpuBodies[i];
				b3GpuSolverBody& solverBody = m_tmpSolverBodyPool[i];
				initSolverBody(i, &solverBody, &body);
				solverBody.m_originalBodyIndex = i;
			}
			m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool);
		}
	}

	//	int totalBodies = 0;
	int totalNumRows = 0;
	//b3RigidBody* rb0=0,*rb1=0;
	//if (1)
	{
		{
			//			int i;

			m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);

			//			b3OpenCLArray<b3GpuGenericConstraint> gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue);

			if (useGpuInfo1)
			{
				B3_PROFILE("info1 and init batchConstraint");

				m_gpuData->m_gpuConstraintInfo1->resize(numConstraints);

				if (1)
				{
					B3_PROFILE("getInfo1Kernel");

					b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo1Kernel, "m_getInfo1Kernel");
					launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
					launcher.setBuffer(gpuConstraints->getBufferCL());
					launcher.setConst(numConstraints);
					launcher.launch1D(numConstraints);
					clFinish(m_gpuData->m_queue);
				}

				if (m_gpuData->m_batchSizes.size() == 0)
				{
					B3_PROFILE("initBatchConstraintsKernel");

					m_gpuData->m_gpuConstraintRowOffsets->resize(numConstraints);
					unsigned int total = 0;
					m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1, *m_gpuData->m_gpuConstraintRowOffsets, numConstraints, &total);
					unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints - 1);
					totalNumRows = total + lastElem;

					{
						B3_PROFILE("init batch constraints");
						b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initBatchConstraintsKernel, "m_initBatchConstraintsKernel");
						launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
						launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
						launcher.setBuffer(gpuConstraints->getBufferCL());
						launcher.setBuffer(gpuBodies->getBufferCL());
						launcher.setConst(numConstraints);
						launcher.launch1D(numConstraints);
						clFinish(m_gpuData->m_queue);
					}
					//assume the batching happens on CPU, so copy the data
					m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
				}
			}
			else
			{
				totalNumRows = 0;
				gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints);
				//calculate the total number of contraint rows
				for (int i = 0; i < numConstraints; i++)
				{
					unsigned int& info1 = m_tmpConstraintSizesPool[i];
					//					unsigned int info1;
					if (m_gpuData->m_cpuConstraints[i].isEnabled())
					{
						m_gpuData->m_cpuConstraints[i].getInfo1(&info1, &m_gpuData->m_cpuBodies[0]);
					}
					else
					{
						info1 = 0;
					}

					totalNumRows += info1;
				}

				m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
				m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool);
			}
			m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
			m_gpuData->m_gpuConstraintRows->resize(totalNumRows);

			//			b3GpuConstraintArray		verify;

			if (useGpuInfo2)
			{
				{
					B3_PROFILE("getInfo2Kernel");
					b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo2Kernel, "m_getInfo2Kernel");
					launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL());
					launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL());
					launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL());
					launcher.setBuffer(gpuConstraints->getBufferCL());
					launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL());
					launcher.setBuffer(gpuBodies->getBufferCL());
					launcher.setBuffer(gpuInertias->getBufferCL());
					launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL());
					launcher.setConst(infoGlobal.m_timeStep);
					launcher.setConst(infoGlobal.m_erp);
					launcher.setConst(infoGlobal.m_globalCfm);
					launcher.setConst(infoGlobal.m_damping);
					launcher.setConst(infoGlobal.m_numIterations);
					launcher.setConst(numConstraints);
					launcher.launch1D(numConstraints);
					clFinish(m_gpuData->m_queue);

					if (m_gpuData->m_batchSizes.size() == 0)
						m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);
					//m_gpuData->m_gpuConstraintRows->copyToHost(verify);
					//m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool);
				}
			}
			else
			{
				gpuInertias->copyToHost(m_gpuData->m_cpuInertias);

				///setup the b3SolverConstraints

				for (int i = 0; i < numConstraints; i++)
				{
					const int& info1 = m_tmpConstraintSizesPool[i];

					if (info1)
					{
						int constraintIndex = batchConstraints[i].m_originalConstraintIndex;
						int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[constraintIndex];

						b3GpuSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[constraintRowOffset];
						b3GpuGenericConstraint& constraint = m_gpuData->m_cpuConstraints[i];

						b3RigidBodyData& rbA = m_gpuData->m_cpuBodies[constraint.getRigidBodyA()];
						//b3RigidBody& rbA = constraint.getRigidBodyA();
						//				b3RigidBody& rbB = constraint.getRigidBodyB();
						b3RigidBodyData& rbB = m_gpuData->m_cpuBodies[constraint.getRigidBodyB()];

						int solverBodyIdA = constraint.getRigidBodyA();  //getOrInitSolverBody(constraint.getRigidBodyA(),bodies,inertias);
						int solverBodyIdB = constraint.getRigidBodyB();  //getOrInitSolverBody(constraint.getRigidBodyB(),bodies,inertias);

						b3GpuSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
						b3GpuSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];

						if (rbA.m_invMass)
						{
							batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;
						}
						else
						{
							if (!solverBodyIdA)
								m_staticIdx = 0;
							batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;
						}

						if (rbB.m_invMass)
						{
							batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;
						}
						else
						{
							if (!solverBodyIdB)
								m_staticIdx = 0;
							batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;
						}

						int overrideNumSolverIterations = 0;  //constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
						if (overrideNumSolverIterations > m_maxOverrideNumSolverIterations)
							m_maxOverrideNumSolverIterations = overrideNumSolverIterations;

						int j;
						for (j = 0; j < info1; j++)
						{
							memset(&currentConstraintRow[j], 0, sizeof(b3GpuSolverConstraint));
							currentConstraintRow[j].m_angularComponentA.setValue(0, 0, 0);
							currentConstraintRow[j].m_angularComponentB.setValue(0, 0, 0);
							currentConstraintRow[j].m_appliedImpulse = 0.f;
							currentConstraintRow[j].m_appliedPushImpulse = 0.f;
							currentConstraintRow[j].m_cfm = 0.f;
							currentConstraintRow[j].m_contactNormal.setValue(0, 0, 0);
							currentConstraintRow[j].m_friction = 0.f;
							currentConstraintRow[j].m_frictionIndex = 0;
							currentConstraintRow[j].m_jacDiagABInv = 0.f;
							currentConstraintRow[j].m_lowerLimit = 0.f;
							currentConstraintRow[j].m_upperLimit = 0.f;

							currentConstraintRow[j].m_originalContactPoint = 0;
							currentConstraintRow[j].m_overrideNumSolverIterations = 0;
							currentConstraintRow[j].m_relpos1CrossNormal.setValue(0, 0, 0);
							currentConstraintRow[j].m_relpos2CrossNormal.setValue(0, 0, 0);
							currentConstraintRow[j].m_rhs = 0.f;
							currentConstraintRow[j].m_rhsPenetration = 0.f;
							currentConstraintRow[j].m_solverBodyIdA = 0;
							currentConstraintRow[j].m_solverBodyIdB = 0;

							currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;
							currentConstraintRow[j].m_upperLimit = B3_INFINITY;
							currentConstraintRow[j].m_appliedImpulse = 0.f;
							currentConstraintRow[j].m_appliedPushImpulse = 0.f;
							currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
							currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
							currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
						}

						bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f);
						bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f);
						bodyAPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f);
						bodyAPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f);
						bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f);
						bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f);
						bodyBPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f);
						bodyBPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f);

						b3GpuConstraintInfo2 info2;
						info2.fps = 1.f / infoGlobal.m_timeStep;
						info2.erp = infoGlobal.m_erp;
						info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
						info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
						info2.m_J2linearAxis = 0;
						info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
						info2.rowskip = sizeof(b3GpuSolverConstraint) / sizeof(b3Scalar);  //check this
						///the size of b3GpuSolverConstraint needs be a multiple of b3Scalar
						b3Assert(info2.rowskip * sizeof(b3Scalar) == sizeof(b3GpuSolverConstraint));
						info2.m_constraintError = &currentConstraintRow->m_rhs;
						currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
						info2.m_damping = infoGlobal.m_damping;
						info2.cfm = &currentConstraintRow->m_cfm;
						info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
						info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
						info2.m_numIterations = infoGlobal.m_numIterations;
						m_gpuData->m_cpuConstraints[i].getInfo2(&info2, &m_gpuData->m_cpuBodies[0]);

						///finalize the constraint setup
						for (j = 0; j < info1; j++)
						{
							b3GpuSolverConstraint& solverConstraint = currentConstraintRow[j];

							if (solverConstraint.m_upperLimit >= m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold())
							{
								solverConstraint.m_upperLimit = m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold();
							}

							if (solverConstraint.m_lowerLimit <= -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold())
							{
								solverConstraint.m_lowerLimit = -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold();
							}

							//						solverConstraint.m_originalContactPoint = constraint;

							b3Matrix3x3& invInertiaWorldA = m_gpuData->m_cpuInertias[constraint.getRigidBodyA()].m_invInertiaWorld;
							{
								//b3Vector3 angularFactorA(1,1,1);
								const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
								solverConstraint.m_angularComponentA = invInertiaWorldA * ftorqueAxis1;  //*angularFactorA;
							}

							b3Matrix3x3& invInertiaWorldB = m_gpuData->m_cpuInertias[constraint.getRigidBodyB()].m_invInertiaWorld;
							{
								const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
								solverConstraint.m_angularComponentB = invInertiaWorldB * ftorqueAxis2;  //*constraint.getRigidBodyB().getAngularFactor();
							}

							{
								//it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal
								//because it gets multiplied iMJlB
								b3Vector3 iMJlA = solverConstraint.m_contactNormal * rbA.m_invMass;
								b3Vector3 iMJaA = invInertiaWorldA * solverConstraint.m_relpos1CrossNormal;
								b3Vector3 iMJlB = solverConstraint.m_contactNormal * rbB.m_invMass;  //sign of normal?
								b3Vector3 iMJaB = invInertiaWorldB * solverConstraint.m_relpos2CrossNormal;

								b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
								sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
								sum += iMJlB.dot(solverConstraint.m_contactNormal);
								sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
								b3Scalar fsum = b3Fabs(sum);
								b3Assert(fsum > B3_EPSILON);
								solverConstraint.m_jacDiagABInv = fsum > B3_EPSILON ? b3Scalar(1.) / sum : 0.f;
							}

							///fix rhs
							///todo: add force/torque accelerators
							{
								b3Scalar rel_vel;
								b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel);
								b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel);

								rel_vel = vel1Dotn + vel2Dotn;

								b3Scalar restitution = 0.f;
								b3Scalar positionalError = solverConstraint.m_rhs;  //already filled in by getConstraintInfo2
								b3Scalar velocityError = restitution - rel_vel * info2.m_damping;
								b3Scalar penetrationImpulse = positionalError * solverConstraint.m_jacDiagABInv;
								b3Scalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv;
								solverConstraint.m_rhs = penetrationImpulse + velocityImpulse;
								solverConstraint.m_appliedImpulse = 0.f;
							}
						}
					}
				}

				m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool);
				m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool);

				if (m_gpuData->m_batchSizes.size() == 0)
					m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints);
				else
					m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints);

				m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool);

			}  //end useGpuInfo2
		}

#ifdef B3_SUPPORT_CONTACT_CONSTRAINTS
		{
			int i;

			for (i = 0; i < numManifolds; i++)
			{
				b3Contact4& manifold = manifoldPtr[i];
				convertContact(bodies, inertias, &manifold, infoGlobal);
			}
		}
#endif  //B3_SUPPORT_CONTACT_CONSTRAINTS
	}

	//	b3ContactSolverInfo info = infoGlobal;

	//	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
	//	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
	//	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();

	return 0.f;
}
Ejemplo n.º 24
0
int main(int argc, char* argv[])
{

	b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile);
	b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile);

	b3SetCustomPrintfFunc(myprintf);
	b3Vector3 test=b3MakeVector3(1,2,3);
	test.x = 1;
	test.y = 4;

	b3Printf("main start");

	b3CommandLineArgs args(argc,argv);


	if (args.CheckCmdLineFlag("help"))
	{
		Usage();
		return 0;
	}


	args.GetCmdLineArgument("selected_demo",selectedDemo);


	bool benchmark=args.CheckCmdLineFlag("benchmark");
	args.GetCmdLineArgument("max_framecount",maxFrameCount);



	dump_timings=args.CheckCmdLineFlag("dump_timings");
	
#ifndef B3_NO_PROFILE
	b3ProfileManager::Reset();
#endif //B3_NO_PROFILE


	window = new b3gDefaultOpenGLWindow();

	b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight);

	window->createWindow(wci);
	window->setResizeCallback(MyResizeCallback);
	window->setMouseMoveCallback(MyMouseMoveCallback);
	window->setMouseButtonCallback(MyMouseButtonCallback);
	window->setKeyboardCallback(MyKeyboardCallback);

	window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org");
	


#ifndef __APPLE__
	glewInit();
#endif

	gui = new GwenUserInterface();

    b3Printf("started GwenUserInterface\n");


	GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight);

	stash = initFont(&prim);


	if (gui)
	{
		gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale());

		b3Printf("init fonts\n");


		gui->setToggleButtonCallback(MyButtonCallback);

		gui->registerToggleButton(MYPAUSE,"Pause");
		gui->registerToggleButton(MYPROFILE,"Profile");
		gui->registerToggleButton(MYRESET,"Reset");

		int numItems = sizeof(allDemos)/sizeof(CpuDemo::CreateFunc*);
		demoNames.clear();
		for (int i=0;i<numItems;i++)
		{
			CpuDemo* demo = allDemos[i]();
			demoNames.push_back(demo->getName());
			delete demo;
		}

		gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0]);
		gui->setComboBoxCallback(MyComboBoxCallback);
	}



	do
	{
		bool syncOnly = false;
		gReset = false;




	static bool once=true;


	//glClearColor(0.3f, 0.3f, 0.3f, 1.0f);
	glClearColor(1,1,1,1);
	glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);

	window->setWheelCallback(b3DefaultWheelCallback);




	{
		CpuDemo* demo = allDemos[selectedDemo]();
		sDemo = demo;
//		demo->myinit();
		bool useGpu = false;

		
		int maxObjectCapacity=1024*1024;//128*1024;
		int maxShapeCapacityInBytes=10*1024*1024;

		//maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10);

		
		CpuDemo::ConstructionInfo ci;
		ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity,maxShapeCapacityInBytes);
		ci.m_window = window;
		ci.m_gui = gui;
		ci.m_instancingRenderer->init();
		ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
		ci.m_instancingRenderer->InitShaders();
		ci.m_primRenderer = &prim;
		

//		render.init();
		

		{
			demo->initPhysics(ci);
		}



		

		

		FILE* csvFile = 0;
		FILE* detailsFile = 0;

		if (benchmark)
		{
			gPause = false;
			char prefixFileName[1024];
			char csvFileName[1024];
			char detailsFileName[1024];

			
			
			//todo: move this time stuff into the Platform/Window class
#ifdef _WIN32
			SYSTEMTIME time;
			GetLocalTime(&time);
			char buf[1024];
			DWORD dwCompNameLen = 1024;
			if (0 != GetComputerName(buf, &dwCompNameLen))
			{
				printf("%s", buf);
			} else
			{
				printf("unknown", buf);
			}

			sprintf(prefixFileName,"%s_%s_%s_date_%d-%d-%d_time_%d-%d-%d","CPU",buf,demoNames[selectedDemo],time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond);
			
#else
			timeval now;
			gettimeofday(&now,0);
			
			struct tm* ptm;
			ptm = localtime (&now.tv_sec);
			char buf[1024];
#ifdef __APPLE__
			sprintf(buf,"MacOSX");
#else
			sprintf(buf,"Unix");
#endif
			sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,
					ptm->tm_mday,
					ptm->tm_mon+1,
					ptm->tm_year+1900,
					ptm->tm_hour,
					ptm->tm_min,
					ptm->tm_sec);
			
#endif

			sprintf(csvFileName,"%s.csv",prefixFileName);
			sprintf(detailsFileName,"%s.txt",prefixFileName);
			printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName);

			//GetSystemTime(&time2);

			csvFile=fopen(csvFileName,"w");
			detailsFile = fopen(detailsFileName,"w");
			if (detailsFile)
				defaultOutput = detailsFile;

			//if (f)
			//	fprintf(f,"%s (%dx%dx%d=%d),\n",  g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ);
		}

		
		
		
		do
		{


			GLint err = glGetError();
			assert(err==GL_NO_ERROR);


			if (exportFrame || exportMovie)
			{
				
				if (!renderTexture)
				{
					renderTexture = new GLRenderToTexture();
					GLuint renderTextureId;
					glGenTextures(1, &renderTextureId);

					// "Bind" the newly created texture : all future texture functions will modify this texture
					glBindTexture(GL_TEXTURE_2D, renderTextureId);

					// Give an empty image to OpenGL ( the last "0" )
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0);
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);
					glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);

					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);

					renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR);
				}
				
				bool result = renderTexture->enable();
			} 
			
			err = glGetError();
			assert(err==GL_NO_ERROR);

			b3ProfileManager::Reset();
			b3ProfileManager::Increment_Frame_Counter();

//			render.reshape(g_OpenGLWidth,g_OpenGLHeight);
			ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
			prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			window->startRendering();

			err = glGetError();
			assert(err==GL_NO_ERROR);

			glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT);
			glEnable(GL_DEPTH_TEST);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			if (!gPause)
			{
				B3_PROFILE("clientMoveAndDisplay");

				demo->clientMoveAndDisplay();
			}
			else
			{

			}

			{
				B3_PROFILE("renderScene");
				demo->renderScene();
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			/*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects())
			{
				B3_PROFILE("renderPhysicsWorld");
				b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray();
				b3CollisionObject** colObjArray = &arr[0];

				render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly);
				syncOnly = true;

			}
			*/

			
			if (exportFrame || exportMovie)
			{
				
				char fileName[1024];
				sprintf(fileName,"screenShot%d.png",frameIndex++);
				writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName);
				exportFrame = false;
				renderTexture->disable();
			}


			{
				B3_PROFILE("gui->draw");
				if (gui && gDrawGui)
					gui->draw(g_OpenGLWidth,g_OpenGLHeight);
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			{
				B3_PROFILE("window->endRendering");
				window->endRendering();
			}

			err = glGetError();
			assert(err==GL_NO_ERROR);

			{
				B3_PROFILE("glFinish");
			}

			

			if (dump_timings)
			{
				b3ProfileManager::dumpAll(stdout);
			}

			if (csvFile)
			{
				static int frameCount=0;

				if (frameCount>0)
				{
					DumpSimulationTime(csvFile);
					if (detailsFile)
					{
							fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount);
							b3ProfileManager::dumpAll(detailsFile);
					}
				}

				if (frameCount>=maxFrameCount)
					window->setRequestExit();
				frameCount++;
			}


			if (gStep)
				gPause=true;

		} while (!window->requestedExit() && !gReset);


		demo->exitPhysics();
		b3ProfileManager::CleanupMemory();
		delete ci.m_instancingRenderer;

		delete demo;
		sDemo = 0;

		if (detailsFile)
		{
			fclose(detailsFile);
			detailsFile=0;
		}
		if (csvFile)
		{
			fclose(csvFile);
			csvFile=0;
		}
	}



	} while (gReset);


	if (gui)
		gui->setComboBoxCallback(0);

	{

	

		delete gui;
		gui=0;

		exitFont();


		window->closeWindow();
		delete window;
		window = 0;

	}

	return 0;
}
Ejemplo n.º 25
0
///todo: add some acceleration structure (AABBs, tree etc)
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, 
		const struct b3GpuNarrowPhaseInternalData* narrowphaseData,	class b3GpuBroadphaseInterface* broadphase)
{
	//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);

	B3_PROFILE("castRaysGPU");
	
	{
		B3_PROFILE("raycast copyFromHost");
		m_data->m_gpuRays->copyFromHost(rays);
		m_data->m_gpuHitResults->copyFromHost(hitResults);
		
	}
	
	int numRays = hitResults.size();
	{
		m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
		m_data->m_numRayRigidPairsPerRay->resize(numRays);
		
		m_data->m_gpuNumRayRigidPairs->resize(1);
		m_data->m_gpuRayRigidPairs->resize(numRays * 16);
	}
	
	//run kernel
	const bool USE_BRUTE_FORCE_RAYCAST = false;
	if(USE_BRUTE_FORCE_RAYCAST)
	{
		B3_PROFILE("raycast launch1D");

		b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
		int numRays = rays.size();
		launcher.setConst(numRays);

		launcher.setBuffer(m_data->m_gpuRays->getBufferCL());
		launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL());

		launcher.setConst(numBodies);
		launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
		launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
		
		launcher.launch1D(numRays);
		clFinish(m_data->m_q);
	}
	else
	{
		m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() );

		m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
		
		int numRayRigidPairs = -1;
		m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
		if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() )
		{
			numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
			m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
		}
		
		m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs);	//Radix sort needs b3OpenCLArray::size() to be correct
		
		//Sort ray-rigid pairs by ray index
		{
			B3_PROFILE("sort ray-rigid pairs");
			m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) );
		}
		
		//detect start,count of each ray pair
		{
			B3_PROFILE("detect ray-rigid pair index ranges");
			
			{
				B3_PROFILE("reset ray-rigid pair index ranges");
				
				m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays);	//atomic_min used to find first index
				m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
				clFinish(m_data->m_q);
			}
			
			b3BufferInfoCL bufferInfo[] = 
			{
				b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ),
				
				b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
				b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() )
			};
			
			b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
			launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(numRayRigidPairs);
			
			launcher.launch1D(numRayRigidPairs);
			clFinish(m_data->m_q);
		}
		
		{
			B3_PROFILE("ray-rigid intersection");
			
			b3BufferInfoCL bufferInfo[] = 
			{
				b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ),
				b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ),
				b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
				b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ),
				
				b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ),
				b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ),
				
				b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() )
			};
			
			b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
			launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
			launcher.setConst(numRays);
			
			launcher.launch1D(numRays);
			clFinish(m_data->m_q);
		}
	}
	
	

	//copy results
	{
		B3_PROFILE("raycast copyToHost");
		m_data->m_gpuHitResults->copyToHost(hitResults);
	}

}
Ejemplo n.º 26
0
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays,	b3AlignedObjectArray<b3RayHit>& hitResults,
		int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{

//	return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);

	B3_PROFILE("castRaysHost");
	for (int r=0;r<rays.size();r++)
	{
		b3Vector3 rayFrom = rays[r].m_from;
		b3Vector3 rayTo = rays[r].m_to;
		float hitFraction = hitResults[r].m_hitFraction;

		int hitBodyIndex= -1;
		b3Vector3 hitNormal;

		for (int b=0;b<numBodies;b++)
		{
				
			const b3Vector3& pos = bodies[b].m_pos;
			const b3Quaternion& orn = bodies[b].m_quat;
			
			switch (collidables[bodies[b].m_collidableIdx].m_shapeType)
			{
			case SHAPE_SPHERE:
				{
					b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius;
					if (sphere_intersect(pos,  radius, rayFrom, rayTo,hitFraction))
					{
						hitBodyIndex = b;
						b3Vector3 hitPoint;
						hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
						hitNormal = (hitPoint-bodies[b].m_pos).normalize();
					}
				}
			case SHAPE_CONVEX_HULL:
				{

					b3Transform convexWorldTransform;
					convexWorldTransform.setIdentity();
					convexWorldTransform.setOrigin(bodies[b].m_pos);
					convexWorldTransform.setRotation(bodies[b].m_quat);
					b3Transform convexWorld2Local = convexWorldTransform.inverse();

					b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
					b3Vector3 rayToLocal = convexWorld2Local(rayTo);
					
					
					int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
					const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
					if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal))
					{
						hitBodyIndex = b;
					}

					
					break;
				}
			default:
				{
					static bool once=true;
					if (once)
					{
						once=false;
						b3Warning("Raytest: unsupported shape type\n");
					}
				}
			}
		}
		if (hitBodyIndex>=0)
		{

			hitResults[r].m_hitFraction = hitFraction;
			hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
			hitResults[r].m_hitNormal = hitNormal;
			hitResults[r].m_hitBody = hitBodyIndex;
		}

	}
}
Ejemplo n.º 27
0
	bool checkData()
	{
		bool hasStatus = false;

		int serviceResult = enet_host_service(m_client, &m_event, 0);

		if (serviceResult > 0)
		{
			switch (m_event.type)
			{
			case ENET_EVENT_TYPE_CONNECT:
				printf("A new client connected from %x:%u.\n",
					m_event.peer->address.host,
					m_event.peer->address.port);

				m_event.peer->data = (void*)"New User";
				break;

			case ENET_EVENT_TYPE_RECEIVE:
			{
				if (gVerboseNetworkMessagesClient)
				{
					printf("A packet of length %lu containing '%s' was "
						"received from %s on channel %u.\n",
						m_event.packet->dataLength,
						(char*)m_event.packet->data,
						(char*)m_event.peer->data,
						m_event.channelID);
				}

				int packetSizeInBytes = b3DeserializeInt(m_event.packet->data);

				if (packetSizeInBytes == m_event.packet->dataLength)
				{

					SharedMemoryStatus* statPtr = (SharedMemoryStatus*)&m_event.packet->data[4];
					if (statPtr->m_type == CMD_STEP_FORWARD_SIMULATION_COMPLETED)
					{
						SharedMemoryStatus dummy;
						dummy.m_type = CMD_STEP_FORWARD_SIMULATION_COMPLETED;
						m_lastStatus = dummy;
						m_stream.resize(0);
					}
					else
					{

						m_lastStatus = *statPtr;
						int streamOffsetInBytes = 4 + sizeof(SharedMemoryStatus);
						int numStreamBytes = packetSizeInBytes - streamOffsetInBytes;
						m_stream.resize(numStreamBytes);
						for (int i = 0; i < numStreamBytes; i++)
						{
							m_stream[i] = m_event.packet->data[i + streamOffsetInBytes];
						}
					}
				}
				else
				{
					printf("unknown status message received\n");
				}
				enet_packet_destroy(m_event.packet);
				hasStatus = true;
				break;
			}
			case ENET_EVENT_TYPE_DISCONNECT:
			{
				printf("%s disconnected.\n", (char*)m_event.peer->data);

				break;
			}
			default:
				{
					printf("unknown event type: %d.\n", m_event.type);
				}
			}
		}
		else if (serviceResult > 0)
		{
			puts("Error with servicing the client");
		}

		return hasStatus;
	}
Ejemplo n.º 28
0
void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index)
{
	B3_PROFILE("solveContacts");
	m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
	m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
	m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts);

	if (optionalSortContactsDeterminism)
	{
		if (!gCpuSortContactsDeterminism)
		{
			B3_PROFILE("GPU Sort contact constraints (determinism)");

			m_data->m_pBufContactOutGPUCopy->resize(numContacts);
			m_data->m_contactKeyValues->resize(numContacts);

			m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0);

			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
						
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}

			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);

			{
				B3_PROFILE("gpu reorderContactKernel (determinism)");
                                
				b3Int4 cdata;
				cdata.x = numContacts;
                                
				//b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
				//	, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
				b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst( cdata );
				launcher.launch1D( numContacts, 64 );
            }

		} else
		{
			B3_PROFILE("CPU Sort contact constraints (determinism)");
			b3AlignedObjectArray<b3Contact4> cpuConstraints;
			m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints);
			bool sort = true;
			if (sort)
			{
				cpuConstraints.quickSort(b3ContactCmp);

				for (int i=0;i<cpuConstraints.size();i++)
				{
					cpuConstraints[i].m_batchIdx = i;
				}
			}
			m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints);
			if (m_debugOutput==100)
			{
				for (int i=0;i<cpuConstraints.size();i++)
				{
					printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx);
				}
			}

			m_debugOutput++;
		}
	}
	



	int nContactOut = m_data->m_pBufContactOutGPU->size();

	bool useSolver = true;
	

    if (useSolver)
    {
        float dt=1./60.;
        b3ConstraintCfg csCfg( dt );
        csCfg.m_enableParallelSolve = true;
        csCfg.m_batchCellSize = 6;
        csCfg.m_staticIdx = static0Index;
        
        
        b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU;

        void* additionalData = 0;//m_data->m_frictionCGPU;
        const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU;
        b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
        int nContacts = nContactOut;
        
        
		int maxNumBatches = 0;
 
		if (!gUseLargeBatches)
        {
            
            if( m_data->m_solverGPU->m_contactBuffer2)
            {
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
            
            if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
            {
				m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts );
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
			
            //clFinish(m_data->m_queue);
            
            
            
			{
				B3_PROFILE("batching");
				//@todo: just reserve it, without copy of original contact (unless we use warmstarting)



				const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf;


				{

					//b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
					//b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );

					const int sortAlignment = 512; // todo. get this out of sort
					if( csCfg.m_enableParallelSolve )
					{


						int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment );

						b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
						b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;


						if (!gCpuSetSortData)
						{	//	2. set cell idx
							B3_PROFILE("GPU set cell idx");
							struct CB
							{
								int m_nContacts;
								int m_staticIdx;
								float m_scale;
								b3Int4 m_nSplit;
							};

							b3Assert( sortSize%64 == 0 );
							CB cdata;
							cdata.m_nContacts = nContacts;
							cdata.m_staticIdx = csCfg.m_staticIdx;
							cdata.m_scale = 1.f/csCfg.m_batchCellSize;
							cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X;
							cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y;
							cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z;

							m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);


							b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
							b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" );
							launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
							launcher.setConst( cdata.m_nContacts );
							launcher.setConst( cdata.m_scale );
							launcher.setConst(cdata.m_nSplit);
							launcher.setConst(cdata.m_staticIdx);


							launcher.launch1D( sortSize, 64 );
						} else
						{
							m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
							b3AlignedObjectArray<b3SortData> sortDataCPU;
							m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU);

							b3AlignedObjectArray<b3Contact4> contactCPU;
							m_data->m_pBufContactOutGPU->copyToHost(contactCPU);
							b3AlignedObjectArray<b3RigidBodyData> bodiesCPU;
							bodyBuf->copyToHost(bodiesCPU);
							float scale = 1.f/csCfg.m_batchCellSize;
							b3Int4 nSplit;
							nSplit.x = B3_SOLVER_N_SPLIT_X;
							nSplit.y = B3_SOLVER_N_SPLIT_Y;
							nSplit.z = B3_SOLVER_N_SPLIT_Z;

							SetSortDataCPU(&contactCPU[0],  &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx);


							m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU);
						}



						if (!gCpuRadixSort)
						{	//	3. sort by cell idx
							B3_PROFILE("gpuRadixSort");
							//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
							//int sortBit = 32;
							//if( n <= 0xffff ) sortBit = 16;
							//if( n <= 0xff ) sortBit = 8;
							//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
							//adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
							b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
							this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut);



						} else
						{
							b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
							b3AlignedObjectArray<b3SortData> hostValues;
							keyValuesInOut.copyToHost(hostValues);
							hostValues.quickSort(sortfnc);
							keyValuesInOut.copyFromHost(hostValues);
						}


						if (gUseScanHost)
						{
							//	4. find entries
							B3_PROFILE("cpuBoundSearch");
							b3AlignedObjectArray<unsigned int> countsHost;
							countsNative->copyToHost(countsHost);

							b3AlignedObjectArray<b3SortData> sortDataHost;
							m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost);


							//m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
							m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);

							countsNative->copyFromHost(countsHost);


							//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
							//	B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );

							//unsigned int sum;
							//m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
							b3AlignedObjectArray<unsigned int> offsetsHost;
							offsetsHost.resize(offsetsNative->size());


							m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum );
							offsetsNative->copyFromHost(offsetsHost);

							//printf("sum = %d\n",sum);
						}  else
						{
							//	4. find entries
							B3_PROFILE("gpuBoundSearch");
							m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
							m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
						} 




						if (nContacts)
						{	//	5. sort constraints by cellIdx
							if (gReorderContactsOnCpu)
							{
								B3_PROFILE("cpu m_reorderContactKernel");
								b3AlignedObjectArray<b3SortData> sortDataHost;
								m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost);
								b3AlignedObjectArray<b3Contact4> inContacts;
								b3AlignedObjectArray<b3Contact4> outContacts;
								m_data->m_pBufContactOutGPU->copyToHost(inContacts);
								outContacts.resize(inContacts.size());
								for (int i=0;i<nContacts;i++)
								{
									int srcIdx = sortDataHost[i].y;
									outContacts[i] = inContacts[srcIdx];
								}
								m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts);

								/*								"void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n"
								"{\n"
								"	int nContacts = cb.x;\n"
								"	int gIdx = GET_GLOBAL_IDX;\n"
								"	if( gIdx < nContacts )\n"
								"	{\n"
								"		int srcIdx = sortData[gIdx].y;\n"
								"		out[gIdx] = in[srcIdx];\n"
								"	}\n"
								"}\n"
								*/
							} else
							{
								B3_PROFILE("gpu m_reorderContactKernel");

								b3Int4 cdata;
								cdata.x = nContacts;

								b3BufferInfoCL bInfo[] = { 
									b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), 
									b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
									, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };

									b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
									launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
									launcher.setConst( cdata );
									launcher.launch1D( nContacts, 64 );
							}
						}




					}

				}

				//clFinish(m_data->m_queue);

				//				{
				//				b3AlignedObjectArray<unsigned int> histogram;
				//				m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
				//				printf(",,,\n");
				//				}


				if (nContacts)
				{

					if (gUseCpuCopyConstraints)
					{
						for (int i=0;i<nContacts;i++)
						{
							m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2);
							//							m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); 
							//						m_data->m_pBufContactOutGPU->getBufferCL() 
						}

					} else
					{
						B3_PROFILE("gpu m_copyConstraintKernel");
						b3Int4 cdata; cdata.x = nContacts;
						b3BufferInfoCL bInfo[] = { 
							b3BufferInfoCL(  m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), 
							b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) 
						};

						b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" );
						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
						launcher.setConst(  cdata );
						launcher.launch1D( nContacts, 64 );
						//we use the clFinish for proper benchmark/profile
						clFinish(m_data->m_queue);
					}
				}


				bool compareGPU = false;
				if (nContacts)
				{
					if (!gCpuBatchContacts)
					{
						B3_PROFILE("gpu batchContacts");
						maxNumBatches = 150;//250;
						m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
						clFinish(m_data->m_queue);
					} else
					{
						B3_PROFILE("cpu batchContacts");
						static b3AlignedObjectArray<b3Contact4> cpuContacts;
						b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
						{
							B3_PROFILE("copyToHost");
							contactsIn->copyToHost(cpuContacts);
						}
						b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
						b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;

						b3AlignedObjectArray<unsigned int> nNativeHost;
						b3AlignedObjectArray<unsigned int> offsetsNativeHost;

						{
							B3_PROFILE("countsNative/offsetsNative copyToHost");
							countsNative->copyToHost(nNativeHost);
							offsetsNative->copyToHost(offsetsNativeHost);
						}


						int numNonzeroGrid=0;

						if (gUseLargeBatches)
						{
							m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
							int totalNumConstraints = cpuContacts.size();
							int simdWidth =numBodies+1;//-1;//64;//-1;//32;
							int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]);	//	on GPU
							maxNumBatches = b3Max(numBatches,maxNumBatches);
							static int globalMaxBatch = 0;
							if (maxNumBatches>globalMaxBatch )
							{
								globalMaxBatch  = maxNumBatches;
								b3Printf("maxNumBatches = %d\n",maxNumBatches);
							}
								
						} else
						{
							m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES);
							B3_PROFILE("cpu batch grid");
							for(int i=0; i<B3_SOLVER_N_CELLS; i++)
							{
								int n = (nNativeHost)[i];
								int offset = (offsetsNativeHost)[i];
								if( n )
								{
									numNonzeroGrid++;
									int simdWidth =numBodies+1;//-1;//64;//-1;//32;
									int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]);	//	on GPU
									maxNumBatches = b3Max(numBatches,maxNumBatches);
									static int globalMaxBatch = 0;
									if (maxNumBatches>globalMaxBatch )
									{
										globalMaxBatch  = maxNumBatches;
										b3Printf("maxNumBatches = %d\n",maxNumBatches);
									}
									//we use the clFinish for proper benchmark/profile
									
								}
							}
							//clFinish(m_data->m_queue);
						}
						{
							B3_PROFILE("m_contactBuffer->copyFromHost");
							m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
						}

					} 

				}


			


			} 


		} 


			//printf("maxNumBatches = %d\n", maxNumBatches);

		if (gUseLargeBatches)
		{
			if (nContacts)
			{
				B3_PROFILE("cpu batchContacts");
				static b3AlignedObjectArray<b3Contact4> cpuContacts;
//				b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
				{
					B3_PROFILE("copyToHost");
					m_data->m_pBufContactOutGPU->copyToHost(cpuContacts);
				}
				b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
				b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;



				int numNonzeroGrid=0;

				{
					m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
					int totalNumConstraints = cpuContacts.size();
					int simdWidth =numBodies+1;//-1;//64;//-1;//32;
					int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]);	//	on GPU
					maxNumBatches = b3Max(numBatches,maxNumBatches);
					static int globalMaxBatch = 0;
					if (maxNumBatches>globalMaxBatch )
					{
						globalMaxBatch  = maxNumBatches;
						b3Printf("maxNumBatches = %d\n",maxNumBatches);
					}
								
				}
				{
					B3_PROFILE("m_contactBuffer->copyFromHost");
					m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
				}

			} 

		}

		if (nContacts)
		{
			B3_PROFILE("gpu convertToConstraints");
			m_data->m_solverGPU->convertToConstraints( bodyBuf, 
				shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
				contactConstraintOut, 
				additionalData, nContacts, 
				(b3SolverBase::ConstraintCfg&) csCfg );
			clFinish(m_data->m_queue);
		}


		if (1)
		{
			int numIter = 4;

			m_data->m_solverGPU->m_nIterations = numIter;//10
			if (!gCpuSolveConstraint)
			{
				B3_PROFILE("GPU solveContactConstraint");

				/*m_data->m_solverGPU->solveContactConstraint(
				m_data->m_bodyBufferGPU, 
				m_data->m_inertiaBufferGPU,
				m_data->m_contactCGPU,0,
				nContactOut ,
				maxNumBatches);
				*/

				//m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes);

				if (gUseLargeBatches)
				{
					solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, 
						m_data->m_inertiaBufferGPU,
						m_data->m_contactCGPU,0,
						nContactOut ,
						maxNumBatches,numIter,&m_data->m_batchSizes);
				} else
				{
					solveContactConstraint(
						m_data->m_bodyBufferGPU, 
						m_data->m_inertiaBufferGPU,
						m_data->m_contactCGPU,0,
						nContactOut ,
						maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu);
				}
			}
			else
			{
				B3_PROFILE("Host solveContactConstraint");

				m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes);
			}
            
            
        }
        
        
#if 0
        if (0)
        {
            B3_PROFILE("read body velocities back to CPU");
            //read body updated linear/angular velocities back to CPU
            m_data->m_bodyBufferGPU->read(
                                                  m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies);
            adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL );
        }
#endif
        
    }

}
Ejemplo n.º 29
0
inline int b3GpuPgsContactSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
{
	
	B3_PROFILE("sortConstraintByBatch2");
	

	
	bodyUsed2.resize(2*simdWidth);

	for (int q=0;q<2*simdWidth;q++)
		bodyUsed2[q]=0;

	int curBodyUsed = 0;

	int numIter = 0;
    
	m_data->m_sortData.resize(numConstraints);
	m_data->m_idxBuffer.resize(numConstraints);
	m_data->m_old.resize(numConstraints);
	
	unsigned int* idxSrc = &m_data->m_idxBuffer[0];
		
#if defined(_DEBUG)
	for(int i=0; i<numConstraints; i++)
		cs[i].getBatchIdx() = -1;
#endif
	for(int i=0; i<numConstraints; i++) 
		idxSrc[i] = i;
    
	int numValidConstraints = 0;
	int unprocessedConstraintIndex = 0;

	int batchIdx = 0;
    

	{
		B3_PROFILE("cpu batch innerloop");
		
		while( numValidConstraints < numConstraints)
		{
			numIter++;
			int nCurrentBatch = 0;
			//	clear flag
			for(int i=0; i<curBodyUsed; i++) 
				bodyUsed2[i] = 0;
            curBodyUsed = 0;

			for(int i=numValidConstraints; i<numConstraints; i++)
			{
				int idx = idxSrc[i];
				b3Assert( idx < numConstraints );
				//	check if it can go
				int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
				int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
				int bodyA = abs(bodyAS);
				int bodyB = abs(bodyBS);
				bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
				bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;
				int aUnavailable = 0;
				int bUnavailable = 0;
				if (!aIsStatic)
				{
					for (int j=0;j<curBodyUsed;j++)
					{
						if (bodyA == bodyUsed2[j])
						{
							aUnavailable=1;
							break;
						}
					}
				}
				if (!aUnavailable)
				if (!bIsStatic)
				{
					for (int j=0;j<curBodyUsed;j++)
					{
						if (bodyB == bodyUsed2[j])
						{
							bUnavailable=1;
							break;
						}
					}
				}
                
				if( aUnavailable==0 && bUnavailable==0 ) // ok
				{
					if (!aIsStatic)
					{
						bodyUsed2[curBodyUsed++] = bodyA;
					}
					if (!bIsStatic)
					{
						bodyUsed2[curBodyUsed++] = bodyB;
					}

					cs[idx].getBatchIdx() = batchIdx;
					m_data->m_sortData[idx].m_key = batchIdx;
					m_data->m_sortData[idx].m_value = idx;

					if (i!=numValidConstraints)
					{
						b3Swap(idxSrc[i], idxSrc[numValidConstraints]);
					}

					numValidConstraints++;
					{
						nCurrentBatch++;
						if( nCurrentBatch == simdWidth )
						{
							nCurrentBatch = 0;
							for(int i=0; i<curBodyUsed; i++) 
								bodyUsed2[i] = 0;

							
							curBodyUsed = 0;
						}
					}
				}
			}
			
			batchIdx ++;
		}
	}
	{
		B3_PROFILE("quickSort");
		//m_data->m_sortData.quickSort(sortfnc);
	}

	{
        B3_PROFILE("reorder");
		//	reorder
		
		memcpy( &m_data->m_old[0], cs, sizeof(b3Contact4)*numConstraints);

		for(int i=0; i<numConstraints; i++)
		{
			b3Assert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]);
			int idx = m_data->m_sortData[idxSrc[i]].m_value;
			cs[i] = m_data->m_old[idx];
		}
	}
	
#if defined(_DEBUG)
    //		debugPrintf( "nBatches: %d\n", batchIdx );
	for(int i=0; i<numConstraints; i++)
    {
        b3Assert( cs[i].getBatchIdx() != -1 );
    }
#endif

	
	return batchIdx;
}
Ejemplo n.º 30
0
inline int b3GpuPgsContactSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
{
	
	B3_PROFILE("sortConstraintByBatch");
	int numIter = 0;
    
	sortData.resize(n);
	idxBuffer.resize(n);
	old.resize(n);
	
	unsigned int* idxSrc = &idxBuffer[0];
	unsigned int* idxDst = &idxBuffer[0];
	int nIdxSrc, nIdxDst;
    
	const int N_FLG = 256;
	const int FLG_MASK = N_FLG-1;
	unsigned int flg[N_FLG/32];
#if defined(_DEBUG)
	for(int i=0; i<n; i++)
		cs[i].getBatchIdx() = -1;
#endif
	for(int i=0; i<n; i++) 
		idxSrc[i] = i;
	nIdxSrc = n;
    
	int batchIdx = 0;
    
	{
		B3_PROFILE("cpu batch innerloop");
		while( nIdxSrc )
		{
			numIter++;
			nIdxDst = 0;
			int nCurrentBatch = 0;
            
			//	clear flag
			for(int i=0; i<N_FLG/32; i++) flg[i] = 0;
            
			for(int i=0; i<nIdxSrc; i++)
			{
				int idx = idxSrc[i];
				

				b3Assert( idx < n );
				//	check if it can go
				int bodyAS = cs[idx].m_bodyAPtrAndSignBit;
				int bodyBS = cs[idx].m_bodyBPtrAndSignBit;
                
				
                
				int bodyA = abs(bodyAS);
				int bodyB = abs(bodyBS);
                
				int aIdx = bodyA & FLG_MASK;
				int bIdx = bodyB & FLG_MASK;
                
				unsigned int aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31));
				unsigned int bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31));
                
				bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;
				bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;

                //use inv_mass!
				aUnavailable = !aIsStatic? aUnavailable:0;//
				bUnavailable = !bIsStatic? bUnavailable:0;
                
				if( aUnavailable==0 && bUnavailable==0 ) // ok
				{
					if (!aIsStatic)
						flg[ aIdx/32 ] |= (1<<(aIdx&31));
					if (!bIsStatic)
						flg[ bIdx/32 ] |= (1<<(bIdx&31));

					cs[idx].getBatchIdx() = batchIdx;
					sortData[idx].m_key = batchIdx;
					sortData[idx].m_value = idx;
                    
					{
						nCurrentBatch++;
						if( nCurrentBatch == simdWidth )
						{
							nCurrentBatch = 0;
							for(int i=0; i<N_FLG/32; i++) flg[i] = 0;
						}
					}
				}
				else
				{
					idxDst[nIdxDst++] = idx;
				}
			}
			b3Swap( idxSrc, idxDst );
			b3Swap( nIdxSrc, nIdxDst );
			batchIdx ++;
		}
	}
	{
		B3_PROFILE("quickSort");
		sortData.quickSort(sortfnc);
	}
	
	
	{
        B3_PROFILE("reorder");
		//	reorder
		
		memcpy( &old[0], cs, sizeof(b3Contact4)*n);
		for(int i=0; i<n; i++)
		{
			int idx = sortData[i].m_value;
			cs[i] = old[idx];
		}
	}
    
	
#if defined(_DEBUG)
    //		debugPrintf( "nBatches: %d\n", batchIdx );
	for(int i=0; i<n; i++)
    {
        b3Assert( cs[i].getBatchIdx() != -1 );
    }
#endif
	return batchIdx;
}