virtual bool fragment(Vec3f bar, TGAColor &color) { //B3_PROFILE("fragment"); Vec4f p = m_viewportMat*(varying_tri_light_view*bar); float depth = p[2]; p = p/p[3]; float index_x = b3Max(float(0.0), b3Min(float(m_width-1), p[0])); float index_y = b3Max(float(0.0), b3Min(float(m_height-1), p[1])); int idx = int(index_x) + int(index_y)*m_width; // index in the shadowbuffer array float shadow = 0.8+0.2*(m_shadowBuffer->at(idx)<-depth+0.05); // magic coeff to avoid z-fighting Vec3f bn = (varying_nrm*bar).normalize(); Vec2f uv = varying_uv*bar; Vec3f reflection_direction = (bn * (bn * m_light_dir_local * 2.f) - m_light_dir_local).normalize(); float specular = pow(b3Max(reflection_direction.z, 0.f), m_model->specular(uv)); float diffuse = b3Max(0.f, bn * m_light_dir_local); color = m_model->diffuse(uv); color[0] *= m_colorRGBA[0]; color[1] *= m_colorRGBA[1]; color[2] *= m_colorRGBA[2]; color[3] *= m_colorRGBA[3]; for (int i = 0; i < 3; ++i) { color[i] = b3Min(int(m_ambient_coefficient*color[i] + shadow*(m_diffuse_coefficient*diffuse+m_specular_coefficient*specular)*color[i]*m_light_color[i]), 255); } return false; }
virtual bool fragment(Vec3f bar, TGAColor &color) { Vec3f bn = (varying_nrm*bar).normalize(); Vec2f uv = varying_uv*bar; Vec3f reflection_direction = (bn * (bn * m_light_dir_local * 2.f) - m_light_dir_local).normalize(); float specular = pow(b3Max(reflection_direction.z, 0.f), m_model->specular(uv)); float diffuse = b3Max(0.f, bn * m_light_dir_local); float ambient_coefficient = 0.6; float diffuse_coefficient = 0.35; float specular_coefficient = 0.05; float intensity = ambient_coefficient + b3Min(diffuse * diffuse_coefficient + specular * specular_coefficient, 1.0f - ambient_coefficient); color = m_model->diffuse(uv) * intensity; //warning: bgra color is swapped to rgba to upload texture color.bgra[0] *= m_colorRGBA[0]; color.bgra[1] *= m_colorRGBA[1]; color.bgra[2] *= m_colorRGBA[2]; color.bgra[3] *= m_colorRGBA[3]; color.bgra[0] *= m_light_color[0]; color.bgra[1] *= m_light_color[1]; color.bgra[2] *= m_light_color[2]; return false; }
bool b3BroadPhase::Report(u32 proxyId) { if (proxyId == m_queryProxyId) { // The proxy can't overlap with itself. return true; } // Check capacity. if (m_pairCount == m_pairCapacity) { // Duplicate capacity. m_pairCapacity *= 2; b3Pair* oldPairs = m_pairs; m_pairs = (b3Pair*)b3Alloc(m_pairCapacity * sizeof(b3Pair)); memcpy(m_pairs, oldPairs, m_pairCount * sizeof(b3Pair)); b3Free(oldPairs); } // Add overlapping pair to the pair buffer. m_pairs[m_pairCount].proxy1 = b3Min(proxyId, m_queryProxyId); m_pairs[m_pairCount].proxy2 = b3Max(proxyId, m_queryProxyId); ++m_pairCount; // Keep looking for overlapping pairs. return true; }
bool b3BroadPhase::QueryCallback(i32 proxyId) { if (proxyId == m_queryProxyId) { // The proxy can't overlap with itself. return true; } // Check capacity. if (m_pairBufferCount == m_pairBufferCapacity) { // Duplicate capacity. m_pairBufferCapacity *= 2; b3Pair* oldPairBuffer = m_pairBuffer; m_pairBuffer = (b3Pair*)::b3Alloc(m_pairBufferCapacity * sizeof(b3Pair)); ::memcpy(m_pairBuffer, oldPairBuffer, m_pairBufferCount * sizeof(b3Pair)); ::b3Free(oldPairBuffer); } // Add overlapping pair to the pair buffer. m_pairBuffer[m_pairBufferCount].proxy1 = b3Min(proxyId, m_queryProxyId); m_pairBuffer[m_pairBufferCount].proxy2 = b3Max(proxyId, m_queryProxyId); ++m_pairBufferCount; // Keep looking for overlapping pairs. return true; }
virtual bool fragment(Vec3f bar, TGAColor &color) { Vec3f bn = (varying_nrm*bar).normalize(); Vec2f uv = varying_uv*bar; mat<3,3,float> A; A[0] = ndc_tri.col(1) - ndc_tri.col(0); A[1] = ndc_tri.col(2) - ndc_tri.col(0); A[2] = bn; mat<3,3,float> AI = A.invert(); Vec3f i = AI * Vec3f(varying_uv[0][1] - varying_uv[0][0], varying_uv[0][2] - varying_uv[0][0], 0); Vec3f j = AI * Vec3f(varying_uv[1][1] - varying_uv[1][0], varying_uv[1][2] - varying_uv[1][0], 0); mat<3,3,float> B; B.set_col(0, i.normalize()); B.set_col(1, j.normalize()); B.set_col(2, bn); Vec3f n = (B*m_model->normal(uv)).normalize(); float diff = b3Min(b3Max(0.f, n*m_light_dir_local+0.3f),1.f); //float diff = b3Max(0.f, n*m_light_dir_local); color = m_model->diffuse(uv)*diff; return false; }
static inline b3Scalar calcArea4Points(const b3Vector3 &p0,const b3Vector3 &p1,const b3Vector3 &p2,const b3Vector3 &p3) { // It calculates possible 3 area constructed from random 4 points and returns the biggest one. b3Vector3 a[3],b[3]; a[0] = p0 - p1; a[1] = p0 - p2; a[2] = p0 - p3; b[0] = p2 - p3; b[1] = p1 - p3; b[2] = p1 - p2; //todo: Following 3 cross production can be easily optimized by SIMD. b3Vector3 tmp0 = a[0].cross(b[0]); b3Vector3 tmp1 = a[1].cross(b[1]); b3Vector3 tmp2 = a[2].cross(b[2]); return b3Max(b3Max(tmp0.length2(),tmp1.length2()),tmp2.length2()); }
static inline void b3SolveContact(b3ContactConstraint4& cs, const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, float maxRambdaDt[4], float minRambdaDt[4]) { b3Vector3 dLinVelA; dLinVelA.setZero(); b3Vector3 dAngVelA; dAngVelA.setZero(); b3Vector3 dLinVelB; dLinVelB.setZero(); b3Vector3 dAngVelB; dAngVelB.setZero(); for(int ic=0; ic<4; ic++) { // dont necessary because this makes change to 0 if( cs.m_jacCoeffInv[ic] == 0.f ) continue; { b3Vector3 angular0, angular1, linear; b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; b3SetLinearAndAngular( (const b3Vector3 &)-cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, linear, angular0, angular1 ); float rambdaDt = b3CalcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1, linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic]; rambdaDt *= cs.m_jacCoeffInv[ic]; { float prevSum = cs.m_appliedRambdaDt[ic]; float updated = prevSum; updated += rambdaDt; updated = b3Max( updated, minRambdaDt[ic] ); updated = b3Min( updated, maxRambdaDt[ic] ); rambdaDt = updated - prevSum; cs.m_appliedRambdaDt[ic] = updated; } b3Vector3 linImp0 = invMassA*linear*rambdaDt; b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; #ifdef _WIN32 b3Assert(_finite(linImp0.getX())); b3Assert(_finite(linImp1.getX())); #endif { linVelA += linImp0; angVelA += angImp0; linVelB += linImp1; angVelB += angImp1; } } } }
void solveContact3(b3GpuConstraint4* cs, b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA, b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB, b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB) { float minRambdaDt = 0; float maxRambdaDt = FLT_MAX; for(int ic=0; ic<4; ic++) { if( cs->m_jacCoeffInv[ic] == 0.f ) continue; b3Vector3 angular0, angular1, linear; b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr; b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr; setLinearAndAngular( cs->m_linear, r0, r1, linear, angular0, angular1 ); float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; rambdaDt *= cs->m_jacCoeffInv[ic]; { float prevSum = cs->m_appliedRambdaDt[ic]; float updated = prevSum; updated += rambdaDt; updated = b3Max( updated, minRambdaDt ); updated = b3Min( updated, maxRambdaDt ); rambdaDt = updated - prevSum; cs->m_appliedRambdaDt[ic] = updated; } b3Vector3 linImp0 = invMassA*linear*rambdaDt; b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; if (invMassA) { *dLinVelA += linImp0; *dAngVelA += angImp0; } if (invMassB) { *dLinVelB += linImp1; *dAngVelB += angImp1; } } }
static __inline void solveFriction(b3GpuConstraint4& cs, const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, float maxRambdaDt[4], float minRambdaDt[4]) { if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return; const b3Vector3& center = (const b3Vector3&)cs.m_center; b3Vector3 n = -(const b3Vector3&)cs.m_linear; b3Vector3 tangent[2]; #if 1 b3PlaneSpace1 (n, tangent[0],tangent[1]); #else b3Vector3 r = cs.m_worldPos[0]-center; tangent[0] = cross3( n, r ); tangent[1] = cross3( tangent[0], n ); tangent[0] = normalize3( tangent[0] ); tangent[1] = normalize3( tangent[1] ); #endif b3Vector3 angular0, angular1, linear; b3Vector3 r0 = center - posA; b3Vector3 r1 = center - posB; for(int i=0; i<2; i++) { setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 ); float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, linVelA, angVelA, linVelB, angVelB ); rambdaDt *= cs.m_fJacCoeffInv[i]; { float prevSum = cs.m_fAppliedRambdaDt[i]; float updated = prevSum; updated += rambdaDt; updated = b3Max( updated, minRambdaDt[i] ); updated = b3Min( updated, maxRambdaDt[i] ); rambdaDt = updated - prevSum; cs.m_fAppliedRambdaDt[i] = updated; } b3Vector3 linImp0 = invMassA*linear*rambdaDt; b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; #ifdef _WIN32 b3Assert(_finite(linImp0.getX())); b3Assert(_finite(linImp1.getX())); #endif linVelA += linImp0; angVelA += angImp0; linVelB += linImp1; angVelB += angImp1; } { // angular damping for point constraint b3Vector3 ab = ( posB - posA ).normalized(); b3Vector3 ac = ( center - posA ).normalized(); if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) { float angNA = b3Dot( n, angVelA ); float angNB = b3Dot( n, angVelB ); angVelA -= (angNA*0.1f)*n; angVelB -= (angNB*0.1f)*n; } } }
int main(int argc, char* argv[]) { int sz = sizeof(b3Generic6DofConstraint); int sz2 = sizeof(b3Point2PointConstraint); int sz3 = sizeof(b3TypedConstraint); int sz4 = sizeof(b3TranslationalLimitMotor); int sz5 = sizeof(b3RotationalLimitMotor); int sz6 = sizeof(b3Transform); //b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache"); b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile); b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile); b3SetCustomPrintfFunc(myprintf); b3Vector3 test=b3MakeVector3(1,2,3); test.x = 1; test.y = 4; printf("main start"); b3CommandLineArgs args(argc,argv); ParticleDemo::ConstructionInfo ci; if (args.CheckCmdLineFlag("help")) { Usage(); return 0; } selectedDemo = loadCurrentDemoEntry(sStartFileName); args.GetCmdLineArgument("selected_demo",selectedDemo); if (args.CheckCmdLineFlag("new_batching")) { useNewBatchingKernel = true; } bool benchmark=args.CheckCmdLineFlag("benchmark"); args.GetCmdLineArgument("max_framecount",maxFrameCount); args.GetCmdLineArgument("shadowmap_size",shadowMapWorldSize); args.GetCmdLineArgument("shadowmap_resolution",shadowMapWidth); shadowMapHeight=shadowMapWidth; if (args.CheckCmdLineFlag("disable_shadowmap")) { useShadowMap = false; } args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName); gDebugLauncherCL = args.CheckCmdLineFlag("debug_kernel_launch"); dump_timings=args.CheckCmdLineFlag("dump_timings"); ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl"); ci.m_useConcaveMesh = true;//args.CheckCmdLineFlag("use_concave_mesh"); if (ci.m_useConcaveMesh) { enableExperimentalCpuConcaveCollision = true; } ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes"); args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex); args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex); gAllowCpuOpenCL = args.CheckCmdLineFlag("allow_opencl_cpu"); gUseLargeBatches = args.CheckCmdLineFlag("use_large_batches"); gUseJacobi = args.CheckCmdLineFlag("use_jacobi"); gUseDbvt = args.CheckCmdLineFlag("use_dbvt"); gDumpContactStats = args.CheckCmdLineFlag("dump_contact_stats"); gCalcWorldSpaceAabbOnCpu = args.CheckCmdLineFlag("calc_aabb_cpu"); gUseCalculateOverlappingPairsHost = args.CheckCmdLineFlag("calc_pairs_cpu"); gIntegrateOnCpu = args.CheckCmdLineFlag("integrate_cpu"); gConvertConstraintOnCpu = args.CheckCmdLineFlag("convert_constraints_cpu"); useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid"); args.GetCmdLineArgument("x_dim", ci.arraySizeX); args.GetCmdLineArgument("y_dim", ci.arraySizeY); args.GetCmdLineArgument("z_dim", ci.arraySizeZ); args.GetCmdLineArgument("x_gap", ci.gapX); args.GetCmdLineArgument("y_gap", ci.gapY); args.GetCmdLineArgument("z_gap", ci.gapZ); gPause = args.CheckCmdLineFlag("paused"); gDebugForceLoadingFromSource = args.CheckCmdLineFlag("load_cl_kernels_from_disk"); gDebugSkipLoadingBinary = args.CheckCmdLineFlag("disable_cached_cl_kernels"); #ifndef B3_NO_PROFILE b3ProfileManager::Reset(); #endif //B3_NO_PROFILE window = new b3gDefaultOpenGLWindow(); b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight); window->createWindow(wci); window->setResizeCallback(MyResizeCallback); window->setMouseMoveCallback(MyMouseMoveCallback); window->setMouseButtonCallback(MyMouseButtonCallback); window->setKeyboardCallback(MyKeyboardCallback); window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org"); printf("-----------------------------------------------------\n"); #ifndef __APPLE__ glewInit(); #endif gui = new GwenUserInterface(); printf("started GwenUserInterface"); GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight); stash = initFont(&prim); if (gui) { gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale()); printf("init fonts"); gui->setToggleButtonCallback(MyButtonCallback); gui->registerToggleButton(MYPAUSE,"Pause"); gui->registerToggleButton(MYPROFILE,"Profile"); gui->registerToggleButton(MYRESET,"Reset"); int numItems = sizeof(allDemos)/sizeof(ParticleDemo::CreateFunc*); demoNames.clear(); for (int i=0;i<numItems;i++) { GpuDemo* demo = allDemos[i](); demoNames.push_back(demo->getName()); delete demo; } gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0],selectedDemo); gui->setComboBoxCallback(MyComboBoxCallback); } do { bool syncOnly = false; gReset = false; { GLint err; glEnable(GL_BLEND); err = glGetError(); b3Assert(err==GL_NO_ERROR); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glDisable(GL_DEPTH_TEST); err = glGetError(); b3Assert(err==GL_NO_ERROR); window->startRendering(); glClearColor(1,1,1,1); glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); sth_begin_draw(stash); //sth_draw_text(stash, droidRegular,12.f, dx, dy-50, "How does this OpenGL True Type font look? ", &dx,width,height); int spacing = 0;//g_OpenGLHeight; float sx,sy,dx,dy,lh; sx = 0; sy = g_OpenGLHeight; dx = sx; dy = sy; //if (1) const char* msg[] = {"Please wait, initializing the OpenCL demo", "Please make sure to run the demo on a high-end discrete GPU with OpenCL support", "The first time it can take a bit longer to compile the OpenCL kernels.", "Check the console if it takes longer than 1 minute or if a demos has issues.", "Please share the full commandline output when reporting issues:", "App_Bullet3_OpenCL_Demos_* >> error.log", "", "", #ifdef _DEBUG "Some of the demos load a large .obj file,", "please use an optimized build of this app for faster parsing", "", "", #endif "You can press F1 to create a single screenshot,", "or press F2 toggle screenshot (useful to create movies)", "", "", "There are various command-line options such as --benchmark", "See http://github.com/erwincoumans/bullet3 for more information" }; int fontSize = 68; int nummsg = sizeof(msg)/sizeof(const char*); for (int i=0;i<nummsg;i++) { char txt[512]; sprintf(txt,"%s",msg[i]); //sth_draw_text(stash, droidRegular,i, 10, dy-spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight); sth_draw_text(stash, droidRegular,fontSize, 10, spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight); spacing+=fontSize; fontSize = 32; } sth_end_draw(stash); sth_flush_draw(stash); window->endRendering(); } static bool once=true; //glClearColor(0.3f, 0.3f, 0.3f, 1.0f); glClearColor(1,1,1,1); glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); window->setWheelCallback(b3DefaultWheelCallback); { GpuDemo* demo = allDemos[selectedDemo](); sDemo = demo; // demo->myinit(); bool useGpu = false; //int maxObjectCapacity=128*1024; int maxObjectCapacity=1024*1024; maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10); { ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity);//render.getInstancingRenderer(); ci.m_window = window; ci.m_gui = gui; ci.m_instancingRenderer->init(); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->InitShaders(); ci.m_primRenderer = &prim; // render.init(); } { demo->initPhysics(ci); } printf("-----------------------------------------------------\n"); FILE* csvFile = 0; FILE* detailsFile = 0; if (benchmark) { char prefixFileName[1024]; char csvFileName[1024]; char detailsFileName[1024]; b3OpenCLDeviceInfo info; b3OpenCLUtils::getDeviceInfo(demo->getInternalData()->m_clDevice,&info); //todo: move this time stuff into the Platform/Window class #ifdef _WIN32 SYSTEMTIME time; GetLocalTime(&time); char buf[1024]; DWORD dwCompNameLen = 1024; if (0 != GetComputerName(buf, &dwCompNameLen)) { printf("%s", buf); } else { printf("unknown", buf); } sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond); #else timeval now; gettimeofday(&now,0); struct tm* ptm; ptm = localtime (&now.tv_sec); char buf[1024]; #ifdef __APPLE__ sprintf(buf,"MacOSX"); #else sprintf(buf,"Unix"); #endif sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ, ptm->tm_mday, ptm->tm_mon+1, ptm->tm_year+1900, ptm->tm_hour, ptm->tm_min, ptm->tm_sec); #endif sprintf(csvFileName,"%s.csv",prefixFileName); sprintf(detailsFileName,"%s.txt",prefixFileName); printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName); //GetSystemTime(&time2); csvFile=fopen(csvFileName,"w"); detailsFile = fopen(detailsFileName,"w"); if (detailsFile) defaultOutput = detailsFile; //if (f) // fprintf(f,"%s (%dx%dx%d=%d),\n", g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ); } fprintf(defaultOutput,"Demo settings:\n"); fprintf(defaultOutput," SelectedDemo=%d, demoname = %s\n", selectedDemo, demo->getName()); fprintf(defaultOutput," x_dim=%d, y_dim=%d, z_dim=%d\n",ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ); fprintf(defaultOutput," x_gap=%f, y_gap=%f, z_gap=%f\n",ci.gapX,ci.gapY,ci.gapZ); fprintf(defaultOutput,"\nOpenCL settings:\n"); fprintf(defaultOutput," Preferred cl_device index %d\n", ci.preferredOpenCLDeviceIndex); fprintf(defaultOutput," Preferred cl_platform index%d\n", ci.preferredOpenCLPlatformIndex); fprintf(defaultOutput,"\n"); if (demo->getInternalData()->m_platformId) { b3OpenCLUtils::printPlatformInfo( demo->getInternalData()->m_platformId); fprintf(defaultOutput,"\n"); b3OpenCLUtils::printDeviceInfo( demo->getInternalData()->m_clDevice); fprintf(defaultOutput,"\n"); } do { GLint err = glGetError(); assert(err==GL_NO_ERROR); if (exportFrame || exportMovie) { if (!renderTexture) { renderTexture = new GLRenderToTexture(); GLuint renderTextureId; glGenTextures(1, &renderTextureId); // "Bind" the newly created texture : all future texture functions will modify this texture glBindTexture(GL_TEXTURE_2D, renderTextureId); // Give an empty image to OpenGL ( the last "0" ) //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0); //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR); } bool result = renderTexture->enable(); } err = glGetError(); assert(err==GL_NO_ERROR); b3ProfileManager::Reset(); b3ProfileManager::Increment_Frame_Counter(); // render.reshape(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight); err = glGetError(); assert(err==GL_NO_ERROR); window->startRendering(); err = glGetError(); assert(err==GL_NO_ERROR); glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); err = glGetError(); assert(err==GL_NO_ERROR); if (!gPause) { B3_PROFILE("clientMoveAndDisplay"); demo->clientMoveAndDisplay(); } else { } { B3_PROFILE("renderScene"); demo->renderScene(); } err = glGetError(); assert(err==GL_NO_ERROR); /*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects()) { B3_PROFILE("renderPhysicsWorld"); b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray(); b3CollisionObject** colObjArray = &arr[0]; render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly); syncOnly = true; } */ if (exportFrame || exportMovie) { char fileName[1024]; sprintf(fileName,"screenShot%d.png",frameIndex++); writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName); exportFrame = false; renderTexture->disable(); } { B3_PROFILE("gui->draw"); if (gui && gDrawGui) gui->draw(g_OpenGLWidth,g_OpenGLHeight); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("window->endRendering"); window->endRendering(); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("glFinish"); } if (dump_timings) { b3ProfileManager::dumpAll(stdout); } if (csvFile) { static int frameCount=0; if (frameCount>0) { DumpSimulationTime(csvFile); if (detailsFile) { fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount); b3ProfileManager::dumpAll(detailsFile); } } if (frameCount>=maxFrameCount) window->setRequestExit(); frameCount++; } if (gStep) gPause=true; } while (!window->requestedExit() && !gReset); demo->exitPhysics(); b3ProfileManager::CleanupMemory(); delete ci.m_instancingRenderer; delete demo; sDemo = 0; if (detailsFile) { fclose(detailsFile); detailsFile=0; } if (csvFile) { fclose(csvFile); csvFile=0; } } } while (gReset); if (gui) gui->setComboBoxCallback(0); { delete gui; gui=0; exitFont(); window->closeWindow(); delete window; window = 0; } return 0; }
void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) { B3_PROFILE("solveContacts"); m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies); m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies); m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts); if (optionalSortContactsDeterminism) { if (!gCpuSortContactsDeterminism) { B3_PROFILE("GPU Sort contact constraints (determinism)"); m_data->m_pBufContactOutGPUCopy->resize(numContacts); m_data->m_contactKeyValues->resize(numContacts); m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { B3_PROFILE("gpu reorderContactKernel (determinism)"); b3Int4 cdata; cdata.x = numContacts; //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst( cdata ); launcher.launch1D( numContacts, 64 ); } } else { B3_PROFILE("CPU Sort contact constraints (determinism)"); b3AlignedObjectArray<b3Contact4> cpuConstraints; m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints); bool sort = true; if (sort) { cpuConstraints.quickSort(b3ContactCmp); for (int i=0;i<cpuConstraints.size();i++) { cpuConstraints[i].m_batchIdx = i; } } m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints); if (m_debugOutput==100) { for (int i=0;i<cpuConstraints.size();i++) { printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx); } } m_debugOutput++; } } int nContactOut = m_data->m_pBufContactOutGPU->size(); bool useSolver = true; if (useSolver) { float dt=1./60.; b3ConstraintCfg csCfg( dt ); csCfg.m_enableParallelSolve = true; csCfg.m_batchCellSize = 6; csCfg.m_staticIdx = static0Index; b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU; void* additionalData = 0;//m_data->m_frictionCGPU; const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU; b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU; int nContacts = nContactOut; int maxNumBatches = 0; if (!gUseLargeBatches) { if( m_data->m_solverGPU->m_contactBuffer2) { m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); } if( m_data->m_solverGPU->m_contactBuffer2 == 0 ) { m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts ); m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); } //clFinish(m_data->m_queue); { B3_PROFILE("batching"); //@todo: just reserve it, without copy of original contact (unless we use warmstarting) const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf; { //b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf ); //b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn ); const int sortAlignment = 512; // todo. get this out of sort if( csCfg.m_enableParallelSolve ) { int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment ); b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; if (!gCpuSetSortData) { // 2. set cell idx B3_PROFILE("GPU set cell idx"); struct CB { int m_nContacts; int m_staticIdx; float m_scale; b3Int4 m_nSplit; }; b3Assert( sortSize%64 == 0 ); CB cdata; cdata.m_nContacts = nContacts; cdata.m_staticIdx = csCfg.m_staticIdx; cdata.m_scale = 1.f/csCfg.m_batchCellSize; cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X; cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y; cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z; m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata.m_nContacts ); launcher.setConst( cdata.m_scale ); launcher.setConst(cdata.m_nSplit); launcher.setConst(cdata.m_staticIdx); launcher.launch1D( sortSize, 64 ); } else { m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); b3AlignedObjectArray<b3SortData> sortDataCPU; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU); b3AlignedObjectArray<b3Contact4> contactCPU; m_data->m_pBufContactOutGPU->copyToHost(contactCPU); b3AlignedObjectArray<b3RigidBodyData> bodiesCPU; bodyBuf->copyToHost(bodiesCPU); float scale = 1.f/csCfg.m_batchCellSize; b3Int4 nSplit; nSplit.x = B3_SOLVER_N_SPLIT_X; nSplit.y = B3_SOLVER_N_SPLIT_Y; nSplit.z = B3_SOLVER_N_SPLIT_Z; SetSortDataCPU(&contactCPU[0], &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx); m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU); } if (!gCpuRadixSort) { // 3. sort by cell idx B3_PROFILE("gpuRadixSort"); //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; //int sortBit = 32; //if( n <= 0xffff ) sortBit = 16; //if( n <= 0xff ) sortBit = 8; //adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); //adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut); } else { b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); b3AlignedObjectArray<b3SortData> hostValues; keyValuesInOut.copyToHost(hostValues); hostValues.quickSort(sortfnc); keyValuesInOut.copyFromHost(hostValues); } if (gUseScanHost) { // 4. find entries B3_PROFILE("cpuBoundSearch"); b3AlignedObjectArray<unsigned int> countsHost; countsNative->copyToHost(countsHost); b3AlignedObjectArray<b3SortData> sortDataHost; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); //m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); countsNative->copyFromHost(countsHost); //adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, // B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); //unsigned int sum; //m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); b3AlignedObjectArray<unsigned int> offsetsHost; offsetsHost.resize(offsetsNative->size()); m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum ); offsetsNative->copyFromHost(offsetsHost); //printf("sum = %d\n",sum); } else { // 4. find entries B3_PROFILE("gpuBoundSearch"); m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); } if (nContacts) { // 5. sort constraints by cellIdx if (gReorderContactsOnCpu) { B3_PROFILE("cpu m_reorderContactKernel"); b3AlignedObjectArray<b3SortData> sortDataHost; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); b3AlignedObjectArray<b3Contact4> inContacts; b3AlignedObjectArray<b3Contact4> outContacts; m_data->m_pBufContactOutGPU->copyToHost(inContacts); outContacts.resize(inContacts.size()); for (int i=0;i<nContacts;i++) { int srcIdx = sortDataHost[i].y; outContacts[i] = inContacts[srcIdx]; } m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts); /* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" "{\n" " int nContacts = cb.x;\n" " int gIdx = GET_GLOBAL_IDX;\n" " if( gIdx < nContacts )\n" " {\n" " int srcIdx = sortData[gIdx].y;\n" " out[gIdx] = in[srcIdx];\n" " }\n" "}\n" */ } else { B3_PROFILE("gpu m_reorderContactKernel"); b3Int4 cdata; cdata.x = nContacts; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); } } } } //clFinish(m_data->m_queue); // { // b3AlignedObjectArray<unsigned int> histogram; // m_data->m_solverGPU->m_numConstraints->copyToHost(histogram); // printf(",,,\n"); // } if (nContacts) { if (gUseCpuCopyConstraints) { for (int i=0;i<nContacts;i++) { m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2); // m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); // m_data->m_pBufContactOutGPU->getBufferCL() } } else { B3_PROFILE("gpu m_copyConstraintKernel"); b3Int4 cdata; cdata.x = nContacts; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); //we use the clFinish for proper benchmark/profile clFinish(m_data->m_queue); } } bool compareGPU = false; if (nContacts) { if (!gCpuBatchContacts) { B3_PROFILE("gpu batchContacts"); maxNumBatches = 150;//250; m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); clFinish(m_data->m_queue); } else { B3_PROFILE("cpu batchContacts"); static b3AlignedObjectArray<b3Contact4> cpuContacts; b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; { B3_PROFILE("copyToHost"); contactsIn->copyToHost(cpuContacts); } b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; b3AlignedObjectArray<unsigned int> nNativeHost; b3AlignedObjectArray<unsigned int> offsetsNativeHost; { B3_PROFILE("countsNative/offsetsNative copyToHost"); countsNative->copyToHost(nNativeHost); offsetsNative->copyToHost(offsetsNativeHost); } int numNonzeroGrid=0; if (gUseLargeBatches) { m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); int totalNumConstraints = cpuContacts.size(); int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } } else { m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES); B3_PROFILE("cpu batch grid"); for(int i=0; i<B3_SOLVER_N_CELLS; i++) { int n = (nNativeHost)[i]; int offset = (offsetsNativeHost)[i]; if( n ) { numNonzeroGrid++; int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } //we use the clFinish for proper benchmark/profile } } //clFinish(m_data->m_queue); } { B3_PROFILE("m_contactBuffer->copyFromHost"); m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); } } } } } //printf("maxNumBatches = %d\n", maxNumBatches); if (gUseLargeBatches) { if (nContacts) { B3_PROFILE("cpu batchContacts"); static b3AlignedObjectArray<b3Contact4> cpuContacts; // b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; { B3_PROFILE("copyToHost"); m_data->m_pBufContactOutGPU->copyToHost(cpuContacts); } b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; int numNonzeroGrid=0; { m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); int totalNumConstraints = cpuContacts.size(); int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } } { B3_PROFILE("m_contactBuffer->copyFromHost"); m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); } } } if (nContacts) { B3_PROFILE("gpu convertToConstraints"); m_data->m_solverGPU->convertToConstraints( bodyBuf, shapeBuf, m_data->m_solverGPU->m_contactBuffer2, contactConstraintOut, additionalData, nContacts, (b3SolverBase::ConstraintCfg&) csCfg ); clFinish(m_data->m_queue); } if (1) { int numIter = 4; m_data->m_solverGPU->m_nIterations = numIter;//10 if (!gCpuSolveConstraint) { B3_PROFILE("GPU solveContactConstraint"); /*m_data->m_solverGPU->solveContactConstraint( m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches); */ //m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes); if (gUseLargeBatches) { solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches,numIter,&m_data->m_batchSizes); } else { solveContactConstraint( m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu); } } else { B3_PROFILE("Host solveContactConstraint"); m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes); } } #if 0 if (0) { B3_PROFILE("read body velocities back to CPU"); //read body updated linear/angular velocities back to CPU m_data->m_bodyBufferGPU->read( m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL ); } #endif } }
GLInstanceGraphicsShape* btgCreateGraphicsShapeFromWavefrontObj(std::vector<tinyobj::shape_t>& shapes, bool flatShading) { b3AlignedObjectArray<GLInstanceVertex>* vertices = new b3AlignedObjectArray<GLInstanceVertex>; { // int numVertices = obj->vertexCount; // int numIndices = 0; b3AlignedObjectArray<int>* indicesPtr = new b3AlignedObjectArray<int>; for (int s=0;s<(int)shapes.size();s++) { tinyobj::shape_t& shape = shapes[s]; int faceCount = shape.mesh.indices.size(); for (int f=0;f<faceCount;f+=3) { //btVector3 normal(face.m_plane[0],face.m_plane[1],face.m_plane[2]); if (1) { btVector3 normal(0,1,0); int vtxBaseIndex = vertices->size(); GLInstanceVertex vtx0; vtx0.xyzw[0] = shape.mesh.positions[shape.mesh.indices[f]*3+0]; vtx0.xyzw[1] = shape.mesh.positions[shape.mesh.indices[f]*3+1]; vtx0.xyzw[2] = shape.mesh.positions[shape.mesh.indices[f]*3+2]; vtx0.xyzw[3] = 0.f; if (shape.mesh.texcoords.size()) { vtx0.uv[0] = shape.mesh.texcoords[shape.mesh.indices[f]*2+0]; vtx0.uv[1] = shape.mesh.texcoords[shape.mesh.indices[f]*2+1]; } else { vtx0.uv[0] = 0.5; vtx0.uv[1] = 0.5; } GLInstanceVertex vtx1; vtx1.xyzw[0] = shape.mesh.positions[shape.mesh.indices[f+1]*3+0]; vtx1.xyzw[1] = shape.mesh.positions[shape.mesh.indices[f+1]*3+1]; vtx1.xyzw[2] = shape.mesh.positions[shape.mesh.indices[f+1]*3+2]; vtx1.xyzw[3]= 0.f; if (shape.mesh.texcoords.size()) { vtx1.uv[0] = shape.mesh.texcoords[shape.mesh.indices[f+1]*2+0]; vtx1.uv[1] = shape.mesh.texcoords[shape.mesh.indices[f+1]*2+1]; } else { vtx1.uv[0] = 0.5f; vtx1.uv[1] = 0.5f; } GLInstanceVertex vtx2; vtx2.xyzw[0] = shape.mesh.positions[shape.mesh.indices[f+2]*3+0]; vtx2.xyzw[1] = shape.mesh.positions[shape.mesh.indices[f+2]*3+1]; vtx2.xyzw[2] = shape.mesh.positions[shape.mesh.indices[f+2]*3+2]; vtx2.xyzw[3] = 0.f; if (shape.mesh.texcoords.size()) { vtx2.uv[0] = shape.mesh.texcoords[shape.mesh.indices[f+2]*2+0]; vtx2.uv[1] = shape.mesh.texcoords[shape.mesh.indices[f+2]*2+1]; } else { vtx2.uv[0] = 0.5; vtx2.uv[1] = 0.5; } btVector3 v0(vtx0.xyzw[0],vtx0.xyzw[1],vtx0.xyzw[2]); btVector3 v1(vtx1.xyzw[0],vtx1.xyzw[1],vtx1.xyzw[2]); btVector3 v2(vtx2.xyzw[0],vtx2.xyzw[1],vtx2.xyzw[2]); unsigned int maxIndex = 0; maxIndex = b3Max(maxIndex,shape.mesh.indices[f]*3+0); maxIndex = b3Max(maxIndex,shape.mesh.indices[f]*3+1); maxIndex = b3Max(maxIndex,shape.mesh.indices[f]*3+2); maxIndex = b3Max(maxIndex,shape.mesh.indices[f+1]*3+0); maxIndex = b3Max(maxIndex,shape.mesh.indices[f+1]*3+1); maxIndex = b3Max(maxIndex,shape.mesh.indices[f+1]*3+2); maxIndex = b3Max(maxIndex,shape.mesh.indices[f+2]*3+0); maxIndex = b3Max(maxIndex,shape.mesh.indices[f+2]*3+1); maxIndex = b3Max(maxIndex,shape.mesh.indices[f+2]*3+2); bool hasNormals = (shape.mesh.normals.size() && maxIndex<shape.mesh.normals.size() ); if (flatShading || !hasNormals) { normal = (v1-v0).cross(v2-v0); btScalar len2 = normal.length2(); //skip degenerate triangles if (len2 > SIMD_EPSILON) { normal.normalize(); } else { normal.setValue(0,0,0); } vtx0.normal[0] = normal[0]; vtx0.normal[1] = normal[1]; vtx0.normal[2] = normal[2]; vtx1.normal[0] = normal[0]; vtx1.normal[1] = normal[1]; vtx1.normal[2] = normal[2]; vtx2.normal[0] = normal[0]; vtx2.normal[1] = normal[1]; vtx2.normal[2] = normal[2]; } else { vtx0.normal[0] = shape.mesh.normals[shape.mesh.indices[f]*3+0]; vtx0.normal[1] = shape.mesh.normals[shape.mesh.indices[f]*3+1]; vtx0.normal[2] = shape.mesh.normals[shape.mesh.indices[f]*3+2]; //shape.mesh.indices[f+1]*3+0 vtx1.normal[0] = shape.mesh.normals[shape.mesh.indices[f+1]*3+0]; vtx1.normal[1] = shape.mesh.normals[shape.mesh.indices[f+1]*3+1]; vtx1.normal[2] = shape.mesh.normals[shape.mesh.indices[f+1]*3+2]; vtx2.normal[0] = shape.mesh.normals[shape.mesh.indices[f+2]*3+0]; vtx2.normal[1] = shape.mesh.normals[shape.mesh.indices[f+2]*3+1]; vtx2.normal[2] = shape.mesh.normals[shape.mesh.indices[f+2]*3+2]; } vertices->push_back(vtx0); vertices->push_back(vtx1); vertices->push_back(vtx2); indicesPtr->push_back(vtxBaseIndex); indicesPtr->push_back(vtxBaseIndex+1); indicesPtr->push_back(vtxBaseIndex+2); } } } GLInstanceGraphicsShape* gfxShape = new GLInstanceGraphicsShape; gfxShape->m_vertices = vertices; gfxShape->m_numvertices = vertices->size(); gfxShape->m_indices = indicesPtr; gfxShape->m_numIndices = indicesPtr->size(); for (int i=0;i<4;i++) gfxShape->m_scaling[i] = 1;//bake the scaling into the vertices return gfxShape; } }
void b3CollideHulls(b3Manifold& manifold, const b3Transform& xf1, const b3HullShape* s1, const b3Transform& xf2, const b3HullShape* s2) { B3_ASSERT(manifold.pointCount == 0); const b3Hull* hull1 = s1->m_hull; float32 r1 = s1->m_radius; const b3Hull* hull2 = s2->m_hull; float32 r2 = s2->m_radius; float32 totalRadius = r1 + r2; b3FaceQuery faceQuery1 = b3QueryFaceSeparation(xf1, hull1, xf2, hull2); if (faceQuery1.separation > totalRadius) { return; } b3FaceQuery faceQuery2 = b3QueryFaceSeparation(xf2, hull2, xf1, hull1); if (faceQuery2.separation > totalRadius) { return; } b3EdgeQuery edgeQuery = b3QueryEdgeSeparation(xf1, hull1, xf2, hull2); if (edgeQuery.separation > totalRadius) { return; } const float32 kTol = 0.1f * B3_LINEAR_SLOP; if (edgeQuery.separation > b3Max(faceQuery1.separation, faceQuery2.separation) + kTol) { b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2); } else { if (faceQuery1.separation + kTol > faceQuery2.separation) { b3BuildFaceContact(manifold, xf1, faceQuery1.index, s1, xf2, s2, false); } else { b3BuildFaceContact(manifold, xf2, faceQuery2.index, s2, xf1, s1, true); } } // Heuristic succeded. if (manifold.pointCount > 0) { return; } // Heuristic failed. Fallback. if (edgeQuery.separation > b3Max(faceQuery1.separation, faceQuery2.separation)) { b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2); } else { if (faceQuery1.separation > faceQuery2.separation) { b3BuildFaceContact(manifold, xf1, faceQuery1.index, s1, xf2, s2, false); } else { b3BuildFaceContact(manifold, xf2, faceQuery2.index, s2, xf1, s1, true); } } // When both convex hulls are not simplified clipping might fail and create no contact points. // For example, when a hull contains tiny faces, coplanar faces, and/or non-sharped edges. // So we simply create a contact point between the segments. // The hulls might overlap, but is better than solving no contact points. if (manifold.pointCount == 0) { b3BuildEdgeContact(manifold, xf1, edgeQuery.index1, s1, xf2, edgeQuery.index2, s2); } // If the shapes are overlapping then at least on point must be created. B3_ASSERT(manifold.pointCount > 0); }