Example #1
0
    virtual bool fragment(Vec3f bar, TGAColor &color) {
		//B3_PROFILE("fragment");
        Vec4f p = m_viewportMat*(varying_tri_light_view*bar);
        float depth = p[2];
        p = p/p[3];
        
		float index_x = b3Max(float(0.0), b3Min(float(m_width-1), p[0]));
		float index_y = b3Max(float(0.0), b3Min(float(m_height-1), p[1]));
		int idx = int(index_x) + int(index_y)*m_width; // index in the shadowbuffer array
        float shadow = 0.8+0.2*(m_shadowBuffer->at(idx)<-depth+0.05); // magic coeff to avoid z-fighting
        
        Vec3f bn = (varying_nrm*bar).normalize();
        Vec2f uv = varying_uv*bar;
        
        Vec3f reflection_direction = (bn * (bn * m_light_dir_local * 2.f) - m_light_dir_local).normalize();
        float specular = pow(b3Max(reflection_direction.z, 0.f), m_model->specular(uv));
        float diffuse = b3Max(0.f, bn * m_light_dir_local);
        
        color = m_model->diffuse(uv);
        color[0] *= m_colorRGBA[0];
        color[1] *= m_colorRGBA[1];
        color[2] *= m_colorRGBA[2];
        color[3] *= m_colorRGBA[3];
        
        for (int i = 0; i < 3; ++i)
        {
            color[i] = b3Min(int(m_ambient_coefficient*color[i] + shadow*(m_diffuse_coefficient*diffuse+m_specular_coefficient*specular)*color[i]*m_light_color[i]), 255);
        }
        
        return false;

    }
Example #2
0
    virtual bool fragment(Vec3f bar, TGAColor &color) {
        Vec3f bn = (varying_nrm*bar).normalize();
        Vec2f uv = varying_uv*bar;

        Vec3f reflection_direction = (bn * (bn * m_light_dir_local * 2.f) - m_light_dir_local).normalize();
        float specular = pow(b3Max(reflection_direction.z, 0.f), m_model->specular(uv));
        float diffuse = b3Max(0.f, bn * m_light_dir_local);

	float ambient_coefficient = 0.6;
        float diffuse_coefficient = 0.35;
        float specular_coefficient = 0.05;

        float intensity = ambient_coefficient + b3Min(diffuse * diffuse_coefficient + specular * specular_coefficient, 1.0f - ambient_coefficient);

        color = m_model->diffuse(uv) * intensity;

        //warning: bgra color is swapped to rgba to upload texture
        color.bgra[0] *= m_colorRGBA[0];
        color.bgra[1] *= m_colorRGBA[1];
        color.bgra[2] *= m_colorRGBA[2];
        color.bgra[3] *= m_colorRGBA[3];
        
        color.bgra[0] *= m_light_color[0];
        color.bgra[1] *= m_light_color[1];
        color.bgra[2] *= m_light_color[2];

        return false;
    }
Example #3
0
bool b3BroadPhase::Report(u32 proxyId) 
{
	if (proxyId == m_queryProxyId) 
	{
		// The proxy can't overlap with itself.
		return true;
	}

	// Check capacity.
	if (m_pairCount == m_pairCapacity) 
	{
		// Duplicate capacity.
		m_pairCapacity *= 2;
		
		b3Pair* oldPairs = m_pairs;
		m_pairs = (b3Pair*)b3Alloc(m_pairCapacity * sizeof(b3Pair));
		memcpy(m_pairs, oldPairs, m_pairCount * sizeof(b3Pair));
		b3Free(oldPairs);
	}

	// Add overlapping pair to the pair buffer.
	m_pairs[m_pairCount].proxy1 = b3Min(proxyId, m_queryProxyId);
	m_pairs[m_pairCount].proxy2 = b3Max(proxyId, m_queryProxyId);
	++m_pairCount;

	// Keep looking for overlapping pairs.
	return true;
}
Example #4
0
bool b3BroadPhase::QueryCallback(i32 proxyId) {
	if (proxyId == m_queryProxyId) {
		// The proxy can't overlap with itself.
		return true;
	}

	// Check capacity.
	if (m_pairBufferCount == m_pairBufferCapacity) {
		// Duplicate capacity.
		m_pairBufferCapacity *= 2;
		
		b3Pair* oldPairBuffer = m_pairBuffer;
		m_pairBuffer = (b3Pair*)::b3Alloc(m_pairBufferCapacity * sizeof(b3Pair));
		::memcpy(m_pairBuffer, oldPairBuffer, m_pairBufferCount * sizeof(b3Pair));
		::b3Free(oldPairBuffer);
	}

	// Add overlapping pair to the pair buffer.
	m_pairBuffer[m_pairBufferCount].proxy1 = b3Min(proxyId, m_queryProxyId);
	m_pairBuffer[m_pairBufferCount].proxy2 = b3Max(proxyId, m_queryProxyId);
	++m_pairBufferCount;

	// Keep looking for overlapping pairs.
	return true;
}
Example #5
0
    virtual bool fragment(Vec3f bar, TGAColor &color) {
        Vec3f bn = (varying_nrm*bar).normalize();
        Vec2f uv = varying_uv*bar;

        mat<3,3,float> A;
        A[0] = ndc_tri.col(1) - ndc_tri.col(0);
        A[1] = ndc_tri.col(2) - ndc_tri.col(0);
        A[2] = bn;

        mat<3,3,float> AI = A.invert();

        Vec3f i = AI * Vec3f(varying_uv[0][1] - varying_uv[0][0], varying_uv[0][2] - varying_uv[0][0], 0);
        Vec3f j = AI * Vec3f(varying_uv[1][1] - varying_uv[1][0], varying_uv[1][2] - varying_uv[1][0], 0);

        mat<3,3,float> B;
        B.set_col(0, i.normalize());
        B.set_col(1, j.normalize());
        B.set_col(2, bn);

        Vec3f n = (B*m_model->normal(uv)).normalize();

        float diff = b3Min(b3Max(0.f, n*m_light_dir_local+0.3f),1.f);
		//float diff = b3Max(0.f, n*m_light_dir_local);
        color = m_model->diffuse(uv)*diff;

        return false;
    }
Example #6
0
static inline b3Scalar calcArea4Points(const b3Vector3 &p0,const b3Vector3 &p1,const b3Vector3 &p2,const b3Vector3 &p3)
{
	// It calculates possible 3 area constructed from random 4 points and returns the biggest one.

	b3Vector3 a[3],b[3];
	a[0] = p0 - p1;
	a[1] = p0 - p2;
	a[2] = p0 - p3;
	b[0] = p2 - p3;
	b[1] = p1 - p3;
	b[2] = p1 - p2;

	//todo: Following 3 cross production can be easily optimized by SIMD.
	b3Vector3 tmp0 = a[0].cross(b[0]);
	b3Vector3 tmp1 = a[1].cross(b[1]);
	b3Vector3 tmp2 = a[2].cross(b[2]);

	return b3Max(b3Max(tmp0.length2(),tmp1.length2()),tmp2.length2());
}
static inline void b3SolveContact(b3ContactConstraint4& cs, 
	const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
	const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, 
	float maxRambdaDt[4], float minRambdaDt[4])
{

	b3Vector3 dLinVelA; dLinVelA.setZero();
	b3Vector3 dAngVelA; dAngVelA.setZero();
	b3Vector3 dLinVelB; dLinVelB.setZero();
	b3Vector3 dAngVelB; dAngVelB.setZero();

	for(int ic=0; ic<4; ic++)
	{
		//	dont necessary because this makes change to 0
		if( cs.m_jacCoeffInv[ic] == 0.f ) continue;

		{
			b3Vector3 angular0, angular1, linear;
			b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
			b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
			b3SetLinearAndAngular( (const b3Vector3 &)-cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, linear, angular0, angular1 );

			float rambdaDt = b3CalcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1,
				linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic];
			rambdaDt *= cs.m_jacCoeffInv[ic];

			{
				float prevSum = cs.m_appliedRambdaDt[ic];
				float updated = prevSum;
				updated += rambdaDt;
				updated = b3Max( updated, minRambdaDt[ic] );
				updated = b3Min( updated, maxRambdaDt[ic] );
				rambdaDt = updated - prevSum;
				cs.m_appliedRambdaDt[ic] = updated;
			}

			b3Vector3 linImp0 = invMassA*linear*rambdaDt;
			b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
			b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
			b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
#ifdef _WIN32
            b3Assert(_finite(linImp0.getX()));
			b3Assert(_finite(linImp1.getX()));
#endif
			{
				linVelA += linImp0;
				angVelA += angImp0;
				linVelB += linImp1;
				angVelB += angImp1;
			}
		}
	}


}
void solveContact3(b3GpuConstraint4* cs,
			b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
			b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
			b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB)
{
	float minRambdaDt = 0;
	float maxRambdaDt = FLT_MAX;

	for(int ic=0; ic<4; ic++)
	{
		if( cs->m_jacCoeffInv[ic] == 0.f ) continue;

		b3Vector3 angular0, angular1, linear;
		b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr;
		b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr;
		setLinearAndAngular( cs->m_linear, r0, r1, linear, angular0, angular1 );

		float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, 
			*linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];
		rambdaDt *= cs->m_jacCoeffInv[ic];

		{
			float prevSum = cs->m_appliedRambdaDt[ic];
			float updated = prevSum;
			updated += rambdaDt;
			updated = b3Max( updated, minRambdaDt );
			updated = b3Min( updated, maxRambdaDt );
			rambdaDt = updated - prevSum;
			cs->m_appliedRambdaDt[ic] = updated;
		}

		b3Vector3 linImp0 = invMassA*linear*rambdaDt;
		b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
		b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
		b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;

		if (invMassA)
		{
			*dLinVelA += linImp0;
			*dAngVelA += angImp0;
		}
		if (invMassB)
		{
			*dLinVelB += linImp1;
			*dAngVelB += angImp1;
		}
	}
}
Example #9
0
static
__inline
void solveFriction(b3GpuConstraint4& cs,
                   const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
                   const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
                   float maxRambdaDt[4], float minRambdaDt[4])
{

    if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return;
    const b3Vector3& center = (const b3Vector3&)cs.m_center;

    b3Vector3 n = -(const b3Vector3&)cs.m_linear;

    b3Vector3 tangent[2];
#if 1
    b3PlaneSpace1 (n, tangent[0],tangent[1]);
#else
    b3Vector3 r = cs.m_worldPos[0]-center;
    tangent[0] = cross3( n, r );
    tangent[1] = cross3( tangent[0], n );
    tangent[0] = normalize3( tangent[0] );
    tangent[1] = normalize3( tangent[1] );
#endif

    b3Vector3 angular0, angular1, linear;
    b3Vector3 r0 = center - posA;
    b3Vector3 r1 = center - posB;
    for(int i=0; i<2; i++)
    {
        setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 );
        float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
                                    linVelA, angVelA, linVelB, angVelB );
        rambdaDt *= cs.m_fJacCoeffInv[i];

        {
            float prevSum = cs.m_fAppliedRambdaDt[i];
            float updated = prevSum;
            updated += rambdaDt;
            updated = b3Max( updated, minRambdaDt[i] );
            updated = b3Min( updated, maxRambdaDt[i] );
            rambdaDt = updated - prevSum;
            cs.m_fAppliedRambdaDt[i] = updated;
        }

        b3Vector3 linImp0 = invMassA*linear*rambdaDt;
        b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
        b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
        b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
#ifdef _WIN32
        b3Assert(_finite(linImp0.getX()));
        b3Assert(_finite(linImp1.getX()));
#endif
        linVelA += linImp0;
        angVelA += angImp0;
        linVelB += linImp1;
        angVelB += angImp1;
    }

    {   //	angular damping for point constraint
        b3Vector3 ab = ( posB - posA ).normalized();
        b3Vector3 ac = ( center - posA ).normalized();
        if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
        {
            float angNA = b3Dot( n, angVelA );
            float angNB = b3Dot( n, angVelB );

            angVelA -= (angNA*0.1f)*n;
            angVelB -= (angNB*0.1f)*n;
        }
    }

}
Example #10
0
int main(int argc, char* argv[])
{

	int sz = sizeof(b3Generic6DofConstraint);
	int sz2 = sizeof(b3Point2PointConstraint);
	int sz3 = sizeof(b3TypedConstraint);
	int sz4 = sizeof(b3TranslationalLimitMotor);
	int sz5 = sizeof(b3RotationalLimitMotor);
	int sz6 = sizeof(b3Transform);

	//b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache");


	b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile);
	b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile);


	b3SetCustomPrintfFunc(myprintf);
	b3Vector3 test=b3MakeVector3(1,2,3);
	test.x = 1;
	test.y = 4;

    printf("main start");

	b3CommandLineArgs args(argc,argv);
	ParticleDemo::ConstructionInfo ci;

	if (args.CheckCmdLineFlag("help"))
	{
		Usage();
		return 0;
	}

	selectedDemo =  loadCurrentDemoEntry(sStartFileName);


	args.GetCmdLineArgument("selected_demo",selectedDemo);


	if (args.CheckCmdLineFlag("new_batching"))
	{
		useNewBatchingKernel = true;
	}
	bool benchmark=args.CheckCmdLineFlag("benchmark");
	args.GetCmdLineArgument("max_framecount",maxFrameCount);

	args.GetCmdLineArgument("shadowmap_size",shadowMapWorldSize);

	args.GetCmdLineArgument("shadowmap_resolution",shadowMapWidth);
	shadowMapHeight=shadowMapWidth;
	if (args.CheckCmdLineFlag("disable_shadowmap"))
	{
		useShadowMap = false;
	}

	args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName);

	gDebugLauncherCL = args.CheckCmdLineFlag("debug_kernel_launch");

	dump_timings=args.CheckCmdLineFlag("dump_timings");
	ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl");
	ci.m_useConcaveMesh = true;//args.CheckCmdLineFlag("use_concave_mesh");
	if (ci.m_useConcaveMesh)
	{
		enableExperimentalCpuConcaveCollision = true;
	}
	ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes");
	args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex);
	args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex);
	gAllowCpuOpenCL = args.CheckCmdLineFlag("allow_opencl_cpu");
	gUseLargeBatches = args.CheckCmdLineFlag("use_large_batches");

	gUseJacobi = args.CheckCmdLineFlag("use_jacobi");
	gUseDbvt = args.CheckCmdLineFlag("use_dbvt");
	gDumpContactStats = args.CheckCmdLineFlag("dump_contact_stats");
	gCalcWorldSpaceAabbOnCpu = args.CheckCmdLineFlag("calc_aabb_cpu");
	gUseCalculateOverlappingPairsHost = args.CheckCmdLineFlag("calc_pairs_cpu");
	gIntegrateOnCpu = args.CheckCmdLineFlag("integrate_cpu");
	gConvertConstraintOnCpu = args.CheckCmdLineFlag("convert_constraints_cpu");
	useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid");




	args.GetCmdLineArgument("x_dim", ci.arraySizeX);
	args.GetCmdLineArgument("y_dim", ci.arraySizeY);
	args.GetCmdLineArgument("z_dim", ci.arraySizeZ);
	args.GetCmdLineArgument("x_gap", ci.gapX);
	args.GetCmdLineArgument("y_gap", ci.gapY);
	args.GetCmdLineArgument("z_gap", ci.gapZ);

	gPause = args.CheckCmdLineFlag("paused");

	gDebugForceLoadingFromSource = args.CheckCmdLineFlag("load_cl_kernels_from_disk");
	gDebugSkipLoadingBinary = args.CheckCmdLineFlag("disable_cached_cl_kernels");


#ifndef B3_NO_PROFILE
	b3ProfileManager::Reset();
#endif //B3_NO_PROFILE


	window = new b3gDefaultOpenGLWindow();

	b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight);

	window->createWindow(wci);
	window->setResizeCallback(MyResizeCallback);
	window->setMouseMoveCallback(MyMouseMoveCallback);
	window->setMouseButtonCallback(MyMouseButtonCallback);
	window->setKeyboardCallback(MyKeyboardCallback);

	window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org");
	printf("-----------------------------------------------------\n");




#ifndef __APPLE__
	glewInit();
#endif

	gui = new GwenUserInterface();

    printf("started GwenUserInterface");


	GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight);

	stash = initFont(&prim);


	if (gui)
	{
		gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale());

		printf("init fonts");


		gui->setToggleButtonCallback(MyButtonCallback);

		gui->registerToggleButton(MYPAUSE,"Pause");
		gui->registerToggleButton(MYPROFILE,"Profile");
		gui->registerToggleButton(MYRESET,"Reset");

		int numItems = sizeof(allDemos)/sizeof(ParticleDemo::CreateFunc*);
		demoNames.clear();
		for (int i=0;i<numItems;i++)
		{
			GpuDemo* demo = allDemos[i]();
			demoNames.push_back(demo->getName());
			delete demo;
		}

		gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0],selectedDemo);
		gui->setComboBoxCallback(MyComboBoxCallback);
	}



	do
	{
		bool syncOnly = false;
		gReset = false;

			{
		GLint err;
		glEnable(GL_BLEND);
		err = glGetError();
		b3Assert(err==GL_NO_ERROR);
		glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
		glDisable(GL_DEPTH_TEST);
		err = glGetError();
		b3Assert(err==GL_NO_ERROR);
		window->startRendering();
		glClearColor(1,1,1,1);
		glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT);
		glEnable(GL_DEPTH_TEST);

		sth_begin_draw(stash);
		//sth_draw_text(stash, droidRegular,12.f, dx, dy-50, "How does this OpenGL True Type font look? ", &dx,width,height);
		int spacing = 0;//g_OpenGLHeight;
		float sx,sy,dx,dy,lh;
		sx = 0;
		sy = g_OpenGLHeight;
		dx = sx; dy = sy;
		//if (1)
		const char* msg[] = {"Please wait, initializing the OpenCL demo",
			"Please make sure to run the demo on a high-end discrete GPU with OpenCL support",
			"The first time it can take a bit longer to compile the OpenCL kernels.",
			"Check the console if it takes longer than 1 minute or if a demos has issues.",
			"Please share the full commandline output when reporting issues:",
			"App_Bullet3_OpenCL_Demos_* >> error.log",

			"",
			"",
#ifdef _DEBUG
			"Some of the demos load a large .obj file,",
			"please use an optimized build of this app for faster parsing",

			"",
			"",
#endif
			"You can press F1 to create a single screenshot,",
			"or press F2 toggle screenshot (useful to create movies)",
			"",
			"",
			"There are various command-line options such as --benchmark",
			"See http://github.com/erwincoumans/bullet3 for more information"
		};
		int fontSize = 68;

		int nummsg = sizeof(msg)/sizeof(const char*);
		for (int i=0;i<nummsg;i++)
		{
			char txt[512];
			sprintf(txt,"%s",msg[i]);
				//sth_draw_text(stash, droidRegular,i, 10, dy-spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight);
				sth_draw_text(stash, droidRegular,fontSize, 10, spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight);
				spacing+=fontSize;
			fontSize = 32;
		}

		sth_end_draw(stash);
		sth_flush_draw(stash);
		window->endRendering();
	}



	static bool once=true;


	//glClearColor(0.3f, 0.3f, 0.3f, 1.0f);
	glClearColor(1,1,1,1);
	glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT);

	window->setWheelCallback(b3DefaultWheelCallback);




	{
		GpuDemo* demo = allDemos[selectedDemo]();
		sDemo = demo;
//		demo->myinit();
		bool useGpu = false;


		//int maxObjectCapacity=128*1024;
		int maxObjectCapacity=1024*1024;
		maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10);

		{
		ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity);//render.getInstancingRenderer();
		ci.m_window = window;
		ci.m_gui = gui;
		ci.m_instancingRenderer->init();
		ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
		ci.m_instancingRenderer->InitShaders();
		ci.m_primRenderer = &prim;
//		render.init();
		}

		{
			demo->initPhysics(ci);
		}





		printf("-----------------------------------------------------\n");

		FILE* csvFile = 0;
		FILE* detailsFile = 0;

		if (benchmark)
		{

			char prefixFileName[1024];
			char csvFileName[1024];
			char detailsFileName[1024];

			b3OpenCLDeviceInfo info;
			b3OpenCLUtils::getDeviceInfo(demo->getInternalData()->m_clDevice,&info);

			//todo: move this time stuff into the Platform/Window class
#ifdef _WIN32
			SYSTEMTIME time;
			GetLocalTime(&time);
			char buf[1024];
			DWORD dwCompNameLen = 1024;
			if (0 != GetComputerName(buf, &dwCompNameLen))
			{
				printf("%s", buf);
			} else
			{
				printf("unknown", buf);
			}

			sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond);

#else
			timeval now;
			gettimeofday(&now,0);

			struct tm* ptm;
			ptm = localtime (&now.tv_sec);
			char buf[1024];
#ifdef __APPLE__
			sprintf(buf,"MacOSX");
#else
			sprintf(buf,"Unix");
#endif
			sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,
					ptm->tm_mday,
					ptm->tm_mon+1,
					ptm->tm_year+1900,
					ptm->tm_hour,
					ptm->tm_min,
					ptm->tm_sec);

#endif

			sprintf(csvFileName,"%s.csv",prefixFileName);
			sprintf(detailsFileName,"%s.txt",prefixFileName);
			printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName);

			//GetSystemTime(&time2);

			csvFile=fopen(csvFileName,"w");
			detailsFile = fopen(detailsFileName,"w");
			if (detailsFile)
				defaultOutput = detailsFile;

			//if (f)
			//	fprintf(f,"%s (%dx%dx%d=%d),\n",  g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ);
		}


		fprintf(defaultOutput,"Demo settings:\n");
		fprintf(defaultOutput,"  SelectedDemo=%d, demoname = %s\n", selectedDemo, demo->getName());
		fprintf(defaultOutput,"  x_dim=%d, y_dim=%d, z_dim=%d\n",ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ);
		fprintf(defaultOutput,"  x_gap=%f, y_gap=%f, z_gap=%f\n",ci.gapX,ci.gapY,ci.gapZ);
		fprintf(defaultOutput,"\nOpenCL settings:\n");
		fprintf(defaultOutput,"  Preferred cl_device index %d\n", ci.preferredOpenCLDeviceIndex);
		fprintf(defaultOutput,"  Preferred cl_platform index%d\n", ci.preferredOpenCLPlatformIndex);
		fprintf(defaultOutput,"\n");

		if (demo->getInternalData()->m_platformId)
		{
			b3OpenCLUtils::printPlatformInfo( demo->getInternalData()->m_platformId);
			fprintf(defaultOutput,"\n");
			b3OpenCLUtils::printDeviceInfo( demo->getInternalData()->m_clDevice);
			fprintf(defaultOutput,"\n");
		}
		do
		{


			GLint err = glGetError();
			assert(err==GL_NO_ERROR);


			if (exportFrame || exportMovie)
			{

				if (!renderTexture)
				{
					renderTexture = new GLRenderToTexture();
					GLuint renderTextureId;
					glGenTextures(1, &renderTextureId);

					// "Bind" the newly created texture : all future texture functions will modify this texture
					glBindTexture(GL_TEXTURE_2D, renderTextureId);

					// Give an empty image to OpenGL ( the last "0" )
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0);
					//glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);
					glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0);

					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
					glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
					//glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);

					renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR);
				}

				bool result = renderTexture->enable();
			}

			err = glGetError();
			assert(err==GL_NO_ERROR);

			b3ProfileManager::Reset();
			b3ProfileManager::Increment_Frame_Counter();

//			render.reshape(g_OpenGLWidth,g_OpenGLHeight);
			ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight);
			prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			window->startRendering();

			err = glGetError();
			assert(err==GL_NO_ERROR);

			glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT);
			glEnable(GL_DEPTH_TEST);

			err = glGetError();
			assert(err==GL_NO_ERROR);

			if (!gPause)
			{
				B3_PROFILE("clientMoveAndDisplay");

				demo->clientMoveAndDisplay();
			}
			else
			{

			}

			{
				B3_PROFILE("renderScene");
				demo->renderScene();
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			/*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects())
			{
				B3_PROFILE("renderPhysicsWorld");
				b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray();
				b3CollisionObject** colObjArray = &arr[0];

				render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly);
				syncOnly = true;

			}
			*/


			if (exportFrame || exportMovie)
			{

				char fileName[1024];
				sprintf(fileName,"screenShot%d.png",frameIndex++);
				writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName);
				exportFrame = false;
				renderTexture->disable();
			}


			{
				B3_PROFILE("gui->draw");
				if (gui && gDrawGui)
					gui->draw(g_OpenGLWidth,g_OpenGLHeight);
			}
			err = glGetError();
			assert(err==GL_NO_ERROR);


			{
				B3_PROFILE("window->endRendering");
				window->endRendering();
			}

			err = glGetError();
			assert(err==GL_NO_ERROR);

			{
				B3_PROFILE("glFinish");
			}



			if (dump_timings)
			{
				b3ProfileManager::dumpAll(stdout);
			}

			if (csvFile)
			{
				static int frameCount=0;

				if (frameCount>0)
				{
					DumpSimulationTime(csvFile);
					if (detailsFile)
					{
							fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount);
							b3ProfileManager::dumpAll(detailsFile);
					}
				}

				if (frameCount>=maxFrameCount)
					window->setRequestExit();
				frameCount++;
			}


			if (gStep)
				gPause=true;

		} while (!window->requestedExit() && !gReset);


		demo->exitPhysics();
		b3ProfileManager::CleanupMemory();
		delete ci.m_instancingRenderer;

		delete demo;
		sDemo = 0;

		if (detailsFile)
		{
			fclose(detailsFile);
			detailsFile=0;
		}
		if (csvFile)
		{
			fclose(csvFile);
			csvFile=0;
		}
	}



	} while (gReset);


	if (gui)
		gui->setComboBoxCallback(0);

	{



		delete gui;
		gui=0;

		exitFont();


		window->closeWindow();
		delete window;
		window = 0;

	}

	return 0;
}
void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index)
{
	B3_PROFILE("solveContacts");
	m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
	m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
	m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts);

	if (optionalSortContactsDeterminism)
	{
		if (!gCpuSortContactsDeterminism)
		{
			B3_PROFILE("GPU Sort contact constraints (determinism)");

			m_data->m_pBufContactOutGPUCopy->resize(numContacts);
			m_data->m_contactKeyValues->resize(numContacts);

			m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0);

			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}
						
			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);
			
			{
				b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst(numContacts);
				launcher.launch1D( numContacts, 64 );
			}

			m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues);

			{
				B3_PROFILE("gpu reorderContactKernel (determinism)");
                                
				b3Int4 cdata;
				cdata.x = numContacts;
                                
				//b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
				//	, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
				b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
				launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL());
				launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL());
				launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL());
				launcher.setConst( cdata );
				launcher.launch1D( numContacts, 64 );
            }

		} else
		{
			B3_PROFILE("CPU Sort contact constraints (determinism)");
			b3AlignedObjectArray<b3Contact4> cpuConstraints;
			m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints);
			bool sort = true;
			if (sort)
			{
				cpuConstraints.quickSort(b3ContactCmp);

				for (int i=0;i<cpuConstraints.size();i++)
				{
					cpuConstraints[i].m_batchIdx = i;
				}
			}
			m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints);
			if (m_debugOutput==100)
			{
				for (int i=0;i<cpuConstraints.size();i++)
				{
					printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx);
				}
			}

			m_debugOutput++;
		}
	}
	



	int nContactOut = m_data->m_pBufContactOutGPU->size();

	bool useSolver = true;
	

    if (useSolver)
    {
        float dt=1./60.;
        b3ConstraintCfg csCfg( dt );
        csCfg.m_enableParallelSolve = true;
        csCfg.m_batchCellSize = 6;
        csCfg.m_staticIdx = static0Index;
        
        
        b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU;

        void* additionalData = 0;//m_data->m_frictionCGPU;
        const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU;
        b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
        int nContacts = nContactOut;
        
        
		int maxNumBatches = 0;
 
		if (!gUseLargeBatches)
        {
            
            if( m_data->m_solverGPU->m_contactBuffer2)
            {
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
            
            if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
            {
				m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts );
                m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
            }
			
            //clFinish(m_data->m_queue);
            
            
            
			{
				B3_PROFILE("batching");
				//@todo: just reserve it, without copy of original contact (unless we use warmstarting)



				const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf;


				{

					//b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
					//b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );

					const int sortAlignment = 512; // todo. get this out of sort
					if( csCfg.m_enableParallelSolve )
					{


						int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment );

						b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
						b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;


						if (!gCpuSetSortData)
						{	//	2. set cell idx
							B3_PROFILE("GPU set cell idx");
							struct CB
							{
								int m_nContacts;
								int m_staticIdx;
								float m_scale;
								b3Int4 m_nSplit;
							};

							b3Assert( sortSize%64 == 0 );
							CB cdata;
							cdata.m_nContacts = nContacts;
							cdata.m_staticIdx = csCfg.m_staticIdx;
							cdata.m_scale = 1.f/csCfg.m_batchCellSize;
							cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X;
							cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y;
							cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z;

							m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);


							b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };
							b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" );
							launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
							launcher.setConst( cdata.m_nContacts );
							launcher.setConst( cdata.m_scale );
							launcher.setConst(cdata.m_nSplit);
							launcher.setConst(cdata.m_staticIdx);


							launcher.launch1D( sortSize, 64 );
						} else
						{
							m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts);
							b3AlignedObjectArray<b3SortData> sortDataCPU;
							m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU);

							b3AlignedObjectArray<b3Contact4> contactCPU;
							m_data->m_pBufContactOutGPU->copyToHost(contactCPU);
							b3AlignedObjectArray<b3RigidBodyData> bodiesCPU;
							bodyBuf->copyToHost(bodiesCPU);
							float scale = 1.f/csCfg.m_batchCellSize;
							b3Int4 nSplit;
							nSplit.x = B3_SOLVER_N_SPLIT_X;
							nSplit.y = B3_SOLVER_N_SPLIT_Y;
							nSplit.z = B3_SOLVER_N_SPLIT_Z;

							SetSortDataCPU(&contactCPU[0],  &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx);


							m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU);
						}



						if (!gCpuRadixSort)
						{	//	3. sort by cell idx
							B3_PROFILE("gpuRadixSort");
							//int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT;
							//int sortBit = 32;
							//if( n <= 0xffff ) sortBit = 16;
							//if( n <= 0xff ) sortBit = 8;
							//adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize );
							//adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize );
							b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
							this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut);



						} else
						{
							b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer);
							b3AlignedObjectArray<b3SortData> hostValues;
							keyValuesInOut.copyToHost(hostValues);
							hostValues.quickSort(sortfnc);
							keyValuesInOut.copyFromHost(hostValues);
						}


						if (gUseScanHost)
						{
							//	4. find entries
							B3_PROFILE("cpuBoundSearch");
							b3AlignedObjectArray<unsigned int> countsHost;
							countsNative->copyToHost(countsHost);

							b3AlignedObjectArray<b3SortData> sortDataHost;
							m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost);


							//m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
							m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);

							countsNative->copyFromHost(countsHost);


							//adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative,
							//	B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT );

							//unsigned int sum;
							//m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
							b3AlignedObjectArray<unsigned int> offsetsHost;
							offsetsHost.resize(offsetsNative->size());


							m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum );
							offsetsNative->copyFromHost(offsetsHost);

							//printf("sum = %d\n",sum);
						}  else
						{
							//	4. find entries
							B3_PROFILE("gpuBoundSearch");
							m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT);
							m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum );
						} 




						if (nContacts)
						{	//	5. sort constraints by cellIdx
							if (gReorderContactsOnCpu)
							{
								B3_PROFILE("cpu m_reorderContactKernel");
								b3AlignedObjectArray<b3SortData> sortDataHost;
								m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost);
								b3AlignedObjectArray<b3Contact4> inContacts;
								b3AlignedObjectArray<b3Contact4> outContacts;
								m_data->m_pBufContactOutGPU->copyToHost(inContacts);
								outContacts.resize(inContacts.size());
								for (int i=0;i<nContacts;i++)
								{
									int srcIdx = sortDataHost[i].y;
									outContacts[i] = inContacts[srcIdx];
								}
								m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts);

								/*								"void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n"
								"{\n"
								"	int nContacts = cb.x;\n"
								"	int gIdx = GET_GLOBAL_IDX;\n"
								"	if( gIdx < nContacts )\n"
								"	{\n"
								"		int srcIdx = sortData[gIdx].y;\n"
								"		out[gIdx] = in[srcIdx];\n"
								"	}\n"
								"}\n"
								*/
							} else
							{
								B3_PROFILE("gpu m_reorderContactKernel");

								b3Int4 cdata;
								cdata.x = nContacts;

								b3BufferInfoCL bInfo[] = { 
									b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), 
									b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL())
									, b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) };

									b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel");
									launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
									launcher.setConst( cdata );
									launcher.launch1D( nContacts, 64 );
							}
						}




					}

				}

				//clFinish(m_data->m_queue);

				//				{
				//				b3AlignedObjectArray<unsigned int> histogram;
				//				m_data->m_solverGPU->m_numConstraints->copyToHost(histogram);
				//				printf(",,,\n");
				//				}


				if (nContacts)
				{

					if (gUseCpuCopyConstraints)
					{
						for (int i=0;i<nContacts;i++)
						{
							m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2);
							//							m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); 
							//						m_data->m_pBufContactOutGPU->getBufferCL() 
						}

					} else
					{
						B3_PROFILE("gpu m_copyConstraintKernel");
						b3Int4 cdata; cdata.x = nContacts;
						b3BufferInfoCL bInfo[] = { 
							b3BufferInfoCL(  m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), 
							b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) 
						};

						b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" );
						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
						launcher.setConst(  cdata );
						launcher.launch1D( nContacts, 64 );
						//we use the clFinish for proper benchmark/profile
						clFinish(m_data->m_queue);
					}
				}


				bool compareGPU = false;
				if (nContacts)
				{
					if (!gCpuBatchContacts)
					{
						B3_PROFILE("gpu batchContacts");
						maxNumBatches = 150;//250;
						m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx );
						clFinish(m_data->m_queue);
					} else
					{
						B3_PROFILE("cpu batchContacts");
						static b3AlignedObjectArray<b3Contact4> cpuContacts;
						b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
						{
							B3_PROFILE("copyToHost");
							contactsIn->copyToHost(cpuContacts);
						}
						b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
						b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;

						b3AlignedObjectArray<unsigned int> nNativeHost;
						b3AlignedObjectArray<unsigned int> offsetsNativeHost;

						{
							B3_PROFILE("countsNative/offsetsNative copyToHost");
							countsNative->copyToHost(nNativeHost);
							offsetsNative->copyToHost(offsetsNativeHost);
						}


						int numNonzeroGrid=0;

						if (gUseLargeBatches)
						{
							m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
							int totalNumConstraints = cpuContacts.size();
							int simdWidth =numBodies+1;//-1;//64;//-1;//32;
							int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]);	//	on GPU
							maxNumBatches = b3Max(numBatches,maxNumBatches);
							static int globalMaxBatch = 0;
							if (maxNumBatches>globalMaxBatch )
							{
								globalMaxBatch  = maxNumBatches;
								b3Printf("maxNumBatches = %d\n",maxNumBatches);
							}
								
						} else
						{
							m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES);
							B3_PROFILE("cpu batch grid");
							for(int i=0; i<B3_SOLVER_N_CELLS; i++)
							{
								int n = (nNativeHost)[i];
								int offset = (offsetsNativeHost)[i];
								if( n )
								{
									numNonzeroGrid++;
									int simdWidth =numBodies+1;//-1;//64;//-1;//32;
									int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]);	//	on GPU
									maxNumBatches = b3Max(numBatches,maxNumBatches);
									static int globalMaxBatch = 0;
									if (maxNumBatches>globalMaxBatch )
									{
										globalMaxBatch  = maxNumBatches;
										b3Printf("maxNumBatches = %d\n",maxNumBatches);
									}
									//we use the clFinish for proper benchmark/profile
									
								}
							}
							//clFinish(m_data->m_queue);
						}
						{
							B3_PROFILE("m_contactBuffer->copyFromHost");
							m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
						}

					} 

				}


			


			} 


		} 


			//printf("maxNumBatches = %d\n", maxNumBatches);

		if (gUseLargeBatches)
		{
			if (nContacts)
			{
				B3_PROFILE("cpu batchContacts");
				static b3AlignedObjectArray<b3Contact4> cpuContacts;
//				b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
				{
					B3_PROFILE("copyToHost");
					m_data->m_pBufContactOutGPU->copyToHost(cpuContacts);
				}
				b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
				b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets;



				int numNonzeroGrid=0;

				{
					m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES);
					int totalNumConstraints = cpuContacts.size();
					int simdWidth =numBodies+1;//-1;//64;//-1;//32;
					int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]);	//	on GPU
					maxNumBatches = b3Max(numBatches,maxNumBatches);
					static int globalMaxBatch = 0;
					if (maxNumBatches>globalMaxBatch )
					{
						globalMaxBatch  = maxNumBatches;
						b3Printf("maxNumBatches = %d\n",maxNumBatches);
					}
								
				}
				{
					B3_PROFILE("m_contactBuffer->copyFromHost");
					m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts);
				}

			} 

		}

		if (nContacts)
		{
			B3_PROFILE("gpu convertToConstraints");
			m_data->m_solverGPU->convertToConstraints( bodyBuf, 
				shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
				contactConstraintOut, 
				additionalData, nContacts, 
				(b3SolverBase::ConstraintCfg&) csCfg );
			clFinish(m_data->m_queue);
		}


		if (1)
		{
			int numIter = 4;

			m_data->m_solverGPU->m_nIterations = numIter;//10
			if (!gCpuSolveConstraint)
			{
				B3_PROFILE("GPU solveContactConstraint");

				/*m_data->m_solverGPU->solveContactConstraint(
				m_data->m_bodyBufferGPU, 
				m_data->m_inertiaBufferGPU,
				m_data->m_contactCGPU,0,
				nContactOut ,
				maxNumBatches);
				*/

				//m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes);

				if (gUseLargeBatches)
				{
					solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, 
						m_data->m_inertiaBufferGPU,
						m_data->m_contactCGPU,0,
						nContactOut ,
						maxNumBatches,numIter,&m_data->m_batchSizes);
				} else
				{
					solveContactConstraint(
						m_data->m_bodyBufferGPU, 
						m_data->m_inertiaBufferGPU,
						m_data->m_contactCGPU,0,
						nContactOut ,
						maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu);
				}
			}
			else
			{
				B3_PROFILE("Host solveContactConstraint");

				m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes);
			}
            
            
        }
        
        
#if 0
        if (0)
        {
            B3_PROFILE("read body velocities back to CPU");
            //read body updated linear/angular velocities back to CPU
            m_data->m_bodyBufferGPU->read(
                                                  m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies);
            adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL );
        }
#endif
        
    }

}
GLInstanceGraphicsShape* btgCreateGraphicsShapeFromWavefrontObj(std::vector<tinyobj::shape_t>& shapes, bool flatShading)
{
	
	b3AlignedObjectArray<GLInstanceVertex>* vertices = new b3AlignedObjectArray<GLInstanceVertex>;
	{
		//		int numVertices = obj->vertexCount;
		//	int numIndices = 0;
		b3AlignedObjectArray<int>* indicesPtr = new b3AlignedObjectArray<int>;
		
		for (int s=0;s<(int)shapes.size();s++)
		{
			tinyobj::shape_t& shape = shapes[s];
			int faceCount = shape.mesh.indices.size();
			
			
			for (int f=0;f<faceCount;f+=3)
			{
				
				//btVector3 normal(face.m_plane[0],face.m_plane[1],face.m_plane[2]);
				if (1)
				{
					btVector3 normal(0,1,0);
					int vtxBaseIndex = vertices->size();
					
					
					
					GLInstanceVertex vtx0;
					vtx0.xyzw[0] = shape.mesh.positions[shape.mesh.indices[f]*3+0];
					vtx0.xyzw[1] = shape.mesh.positions[shape.mesh.indices[f]*3+1];
					vtx0.xyzw[2] = shape.mesh.positions[shape.mesh.indices[f]*3+2];
					vtx0.xyzw[3] = 0.f;
					
					if (shape.mesh.texcoords.size())
					{
						vtx0.uv[0] = shape.mesh.texcoords[shape.mesh.indices[f]*2+0];
						vtx0.uv[1] = shape.mesh.texcoords[shape.mesh.indices[f]*2+1];
					} else
					{
						vtx0.uv[0] = 0.5;
						vtx0.uv[1] = 0.5;
					}
					
					GLInstanceVertex vtx1;
					vtx1.xyzw[0] = shape.mesh.positions[shape.mesh.indices[f+1]*3+0];
					vtx1.xyzw[1] = shape.mesh.positions[shape.mesh.indices[f+1]*3+1];
					vtx1.xyzw[2] = shape.mesh.positions[shape.mesh.indices[f+1]*3+2];
					vtx1.xyzw[3]= 0.f;

					if (shape.mesh.texcoords.size())
					{
						vtx1.uv[0] = shape.mesh.texcoords[shape.mesh.indices[f+1]*2+0];
						vtx1.uv[1] = shape.mesh.texcoords[shape.mesh.indices[f+1]*2+1];
					} else
					{
						vtx1.uv[0] = 0.5f;
						vtx1.uv[1] = 0.5f;
					}
					
					GLInstanceVertex vtx2;
					vtx2.xyzw[0] = shape.mesh.positions[shape.mesh.indices[f+2]*3+0];
					vtx2.xyzw[1] = shape.mesh.positions[shape.mesh.indices[f+2]*3+1];
					vtx2.xyzw[2] = shape.mesh.positions[shape.mesh.indices[f+2]*3+2];
					vtx2.xyzw[3] = 0.f;
					if (shape.mesh.texcoords.size())
					{
						vtx2.uv[0] = shape.mesh.texcoords[shape.mesh.indices[f+2]*2+0];
						vtx2.uv[1] = shape.mesh.texcoords[shape.mesh.indices[f+2]*2+1];
					} else
					{
						vtx2.uv[0] = 0.5;
						vtx2.uv[1] = 0.5;
					}
					
					
					
					btVector3 v0(vtx0.xyzw[0],vtx0.xyzw[1],vtx0.xyzw[2]);
					btVector3 v1(vtx1.xyzw[0],vtx1.xyzw[1],vtx1.xyzw[2]);
					btVector3 v2(vtx2.xyzw[0],vtx2.xyzw[1],vtx2.xyzw[2]);
					
					unsigned int maxIndex = 0;
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f]*3+0);
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f]*3+1);
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f]*3+2);
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f+1]*3+0);
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f+1]*3+1);
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f+1]*3+2);
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f+2]*3+0);
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f+2]*3+1);
					maxIndex = b3Max(maxIndex,shape.mesh.indices[f+2]*3+2);
					bool hasNormals = (shape.mesh.normals.size() && maxIndex<shape.mesh.normals.size() );
					
					if (flatShading || !hasNormals)
                    {
                        normal = (v1-v0).cross(v2-v0);
                        btScalar len2 = normal.length2();
                        //skip degenerate triangles
                        if (len2 > SIMD_EPSILON)
                        {
                            normal.normalize();
                        } else
                        {
                            normal.setValue(0,0,0);
                        }
                        vtx0.normal[0] = normal[0];
                        vtx0.normal[1] = normal[1];
                        vtx0.normal[2] = normal[2];
                        vtx1.normal[0] = normal[0];
                        vtx1.normal[1] = normal[1];
                        vtx1.normal[2] = normal[2];
                        vtx2.normal[0] = normal[0];
                        vtx2.normal[1] = normal[1];
                        vtx2.normal[2] = normal[2];
                    } else
                    {
                        
                        vtx0.normal[0] = shape.mesh.normals[shape.mesh.indices[f]*3+0];
                        vtx0.normal[1] = shape.mesh.normals[shape.mesh.indices[f]*3+1];
                        vtx0.normal[2] = shape.mesh.normals[shape.mesh.indices[f]*3+2]; //shape.mesh.indices[f+1]*3+0
                        vtx1.normal[0] = shape.mesh.normals[shape.mesh.indices[f+1]*3+0];
                        vtx1.normal[1] = shape.mesh.normals[shape.mesh.indices[f+1]*3+1];
                        vtx1.normal[2] = shape.mesh.normals[shape.mesh.indices[f+1]*3+2];
                        vtx2.normal[0] = shape.mesh.normals[shape.mesh.indices[f+2]*3+0];
                        vtx2.normal[1] = shape.mesh.normals[shape.mesh.indices[f+2]*3+1];
                        vtx2.normal[2] = shape.mesh.normals[shape.mesh.indices[f+2]*3+2];
                        
                        
                    }
                    vertices->push_back(vtx0);
                    vertices->push_back(vtx1);
                    vertices->push_back(vtx2);
                    indicesPtr->push_back(vtxBaseIndex);
                    indicesPtr->push_back(vtxBaseIndex+1);
                    indicesPtr->push_back(vtxBaseIndex+2);
                
                    
				}
			}
		}
		
		
		GLInstanceGraphicsShape* gfxShape = new GLInstanceGraphicsShape;
		gfxShape->m_vertices = vertices;
		gfxShape->m_numvertices = vertices->size();
		gfxShape->m_indices = indicesPtr;
		gfxShape->m_numIndices = indicesPtr->size();
		for (int i=0;i<4;i++)
			gfxShape->m_scaling[i] = 1;//bake the scaling into the vertices
		return gfxShape;
	}
}
Example #13
0
void b3CollideHulls(b3Manifold& manifold,
	const b3Transform& xf1, const b3HullShape* s1,
	const b3Transform& xf2, const b3HullShape* s2)
{
	B3_ASSERT(manifold.pointCount == 0);

	const b3Hull* hull1 = s1->m_hull;
	float32 r1 = s1->m_radius;

	const b3Hull* hull2 = s2->m_hull;
	float32 r2 = s2->m_radius;

	float32 totalRadius = r1 + r2;

	b3FaceQuery faceQuery1 = b3QueryFaceSeparation(xf1, hull1, xf2, hull2);
	if (faceQuery1.separation > totalRadius)
	{
		return;
	}

	b3FaceQuery faceQuery2 = b3QueryFaceSeparation(xf2, hull2, xf1, hull1);
	if (faceQuery2.separation > totalRadius)
	{
		return;
	}

	b3EdgeQuery edgeQuery = b3QueryEdgeSeparation(xf1, hull1, xf2, hull2);
	if (edgeQuery.separation > totalRadius)
	{
		return;
	}

	const float32 kTol = 0.1f * B3_LINEAR_SLOP;
	if (edgeQuery.separation > b3Max(faceQuery1.separation, faceQuery2.separation) + kTol)
	{
		b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2);
	}
	else
	{
		if (faceQuery1.separation + kTol > faceQuery2.separation)
		{
			b3BuildFaceContact(manifold, xf1, faceQuery1.index, s1, xf2, s2, false);
		}
		else
		{
			b3BuildFaceContact(manifold, xf2, faceQuery2.index, s2, xf1, s1, true);
		}
	}

	// Heuristic succeded. 
	if (manifold.pointCount > 0)
	{
		return;
	}

	// Heuristic failed. Fallback.
	if (edgeQuery.separation > b3Max(faceQuery1.separation, faceQuery2.separation))
	{
		b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2);
	}
	else
	{
		if (faceQuery1.separation > faceQuery2.separation)
		{
			b3BuildFaceContact(manifold, xf1, faceQuery1.index, s1, xf2, s2, false);
		}
		else
		{
			b3BuildFaceContact(manifold, xf2, faceQuery2.index, s2, xf1, s1, true);
		}
	}

	// When both convex hulls are not simplified clipping might fail and create no contact points.
	// For example, when a hull contains tiny faces, coplanar faces, and/or non-sharped edges.
	// So we simply create a contact point between the segments.
	// The hulls might overlap, but is better than solving no contact points.
	if (manifold.pointCount == 0)
	{
		b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2);
	}

	// If the shapes are overlapping then at least on point must be created.
	B3_ASSERT(manifold.pointCount > 0);
}