TinyRendererVisualShapeConverterInternalData() :m_upAxis(2), m_swWidth(START_WIDTH), m_swHeight(START_HEIGHT), m_rgbColorBuffer(START_WIDTH,START_HEIGHT,TGAImage::RGB) { m_depthBuffer.resize(m_swWidth*m_swHeight); m_segmentationMaskBuffer.resize(m_swWidth*m_swHeight,-1); }
void b3GeometryUtil::getVerticesFromPlaneEquations(const b3AlignedObjectArray<b3Vector3>& planeEquations , b3AlignedObjectArray<b3Vector3>& verticesOut ) { const int numbrushes = planeEquations.size(); // brute force: for (int i=0;i<numbrushes;i++) { const b3Vector3& N1 = planeEquations[i]; for (int j=i+1;j<numbrushes;j++) { const b3Vector3& N2 = planeEquations[j]; for (int k=j+1;k<numbrushes;k++) { const b3Vector3& N3 = planeEquations[k]; b3Vector3 n2n3; n2n3 = N2.cross(N3); b3Vector3 n3n1; n3n1 = N3.cross(N1); b3Vector3 n1n2; n1n2 = N1.cross(N2); if ( ( n2n3.length2() > b3Scalar(0.0001) ) && ( n3n1.length2() > b3Scalar(0.0001) ) && ( n1n2.length2() > b3Scalar(0.0001) ) ) { //point P out of 3 plane equations: // d1 ( N2 * N3 ) + d2 ( N3 * N1 ) + d3 ( N1 * N2 ) //P = ------------------------------------------------------------------------- // N1 . ( N2 * N3 ) b3Scalar quotient = (N1.dot(n2n3)); if (b3Fabs(quotient) > b3Scalar(0.000001)) { quotient = b3Scalar(-1.) / quotient; n2n3 *= N1[3]; n3n1 *= N2[3]; n1n2 *= N3[3]; b3Vector3 potentialVertex = n2n3; potentialVertex += n3n1; potentialVertex += n1n2; potentialVertex *= quotient; //check if inside, and replace supportingVertexOut if needed if (isPointInsidePlanes(planeEquations,potentialVertex,b3Scalar(0.01))) { verticesOut.push_back(potentialVertex); } } } } } } }
///todo: add some acceleration structure (AABBs, tree etc) void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) { //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); B3_PROFILE("castRaysGPU"); b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q); b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q); { B3_PROFILE("raycast copyFromHost"); gpuRays.copyFromHost(rays); gpuHitResults.resize(hitResults.size()); gpuHitResults.copyFromHost(hitResults); } //run kernel { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(gpuRays.getBufferCL()); launcher.setBuffer(gpuHitResults.getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } //copy results { B3_PROFILE("raycast copyToHost"); gpuHitResults.copyToHost(hitResults); } }
virtual bool fragment(Vec3f bar, TGAColor &color) { //B3_PROFILE("fragment"); Vec4f p = m_viewportMat*(varying_tri_light_view*bar); float depth = p[2]; p = p/p[3]; float index_x = b3Max(float(0.0), b3Min(float(m_width-1), p[0])); float index_y = b3Max(float(0.0), b3Min(float(m_height-1), p[1])); int idx = int(index_x) + int(index_y)*m_width; // index in the shadowbuffer array float shadow = 0.8+0.2*(m_shadowBuffer->at(idx)<-depth+0.05); // magic coeff to avoid z-fighting Vec3f bn = (varying_nrm*bar).normalize(); Vec2f uv = varying_uv*bar; Vec3f reflection_direction = (bn * (bn * m_light_dir_local * 2.f) - m_light_dir_local).normalize(); float specular = pow(b3Max(reflection_direction.z, 0.f), m_model->specular(uv)); float diffuse = b3Max(0.f, bn * m_light_dir_local); color = m_model->diffuse(uv); color[0] *= m_colorRGBA[0]; color[1] *= m_colorRGBA[1]; color[2] *= m_colorRGBA[2]; color[3] *= m_colorRGBA[3]; for (int i = 0; i < 3; ++i) { color[i] = b3Min(int(m_ambient_coefficient*color[i] + shadow*(m_diffuse_coefficient*diffuse+m_specular_coefficient*specular)*color[i]*m_light_color[i]), 255); } return false; }
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables,const struct b3Collidable* collidables) { // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); B3_PROFILE("castRaysHost"); for (int r=0;r<rays.size();r++) { b3Vector3 rayFrom = rays[r].m_from; b3Vector3 rayTo = rays[r].m_to; //if there is a hit, color the pixels bool hits = false; for (int b=0;b<numBodies && !hits;b++) { const b3Vector3& pos = bodies[b].m_pos; const b3Quaternion& orn = bodies[b].m_quat; b3Scalar radius = 1; if (sphere_intersect(pos, radius, rayFrom, rayTo)) hits = true; } if (hits) hitResults[r].m_hitFraction = 0.f; } }
void b3GeometryUtil::getPlaneEquationsFromVertices(b3AlignedObjectArray<b3Vector3>& vertices, b3AlignedObjectArray<b3Vector3>& planeEquationsOut ) { const int numvertices = vertices.size(); // brute force: for (int i=0;i<numvertices;i++) { const b3Vector3& N1 = vertices[i]; for (int j=i+1;j<numvertices;j++) { const b3Vector3& N2 = vertices[j]; for (int k=j+1;k<numvertices;k++) { const b3Vector3& N3 = vertices[k]; b3Vector3 planeEquation,edge0,edge1; edge0 = N2-N1; edge1 = N3-N1; b3Scalar normalSign = b3Scalar(1.); for (int ww=0;ww<2;ww++) { planeEquation = normalSign * edge0.cross(edge1); if (planeEquation.length2() > b3Scalar(0.0001)) { planeEquation.normalize(); if (notExist(planeEquation,planeEquationsOut)) { planeEquation[3] = -planeEquation.dot(N1); //check if inside, and replace supportingVertexOut if needed if (areVerticesBehindPlane(planeEquation,vertices,b3Scalar(0.01))) { planeEquationsOut.push_back(planeEquation); } } } normalSign = b3Scalar(-1.); } } } } }
TinyRendererGUIHelper( int swWidth, int swHeight) : m_upAxis(1), m_swWidth(swWidth), m_swHeight(swHeight), m_rgbColorBuffer(swWidth,swHeight,TGAImage::RGB), m_colObjUniqueIndex(0) { m_depthBuffer.resize(swWidth*swHeight); }
static bool clipTriangleAgainstNearplane(const mat<4,3,float>& triangleIn, b3AlignedObjectArray<mat<4,3,float> >& clippedTrianglesOut) { //discard triangle if all vertices are behind near-plane if (triangleIn[3][0]<0 && triangleIn[3][1] <0 && triangleIn[3][2] <0) { return true; } //accept triangle if all vertices are in front of the near-plane if (triangleIn[3][0]>=0 && triangleIn[3][1] >=0 && triangleIn[3][2] >=0) { clippedTrianglesOut.push_back(triangleIn); return false; } Vec4f vtxCache[5]; b3AlignedObjectArray<Vec4f> vertices; vertices.initializeFromBuffer(vtxCache,0,5); clipEdge(triangleIn,0,1,vertices); clipEdge(triangleIn,1,2,vertices); clipEdge(triangleIn,2,0,vertices); if (vertices.size()<3) return true; if (equals(vertices[0],vertices[vertices.size()-1])) { vertices.pop_back(); } //create a fan of triangles for (int i=1;i<vertices.size()-1;i++) { mat<4,3,float>& vtx = clippedTrianglesOut.expand(); vtx.set_col(0,vertices[0]); vtx.set_col(1,vertices[i]); vtx.set_col(2,vertices[i+1]); } return true; }
void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyCL* bodies, int numCollidables, const struct b3Collidable* collidables) { B3_PROFILE("castRaysGPU"); b3OpenCLArray<b3RayInfo> gpuRays(m_data->m_context,m_data->m_q); gpuRays.copyFromHost(rays); b3OpenCLArray<b3RayHit> gpuHitResults(m_data->m_context,m_data->m_q); gpuHitResults.resize(hitResults.size()); b3OpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_q); gpuBodies.resize(numBodies); gpuBodies.copyFromHostPointer(bodies,numBodies); b3OpenCLArray<b3Collidable> gpuCollidables(m_data->m_context,m_data->m_q); gpuCollidables.resize(numCollidables); gpuCollidables.copyFromHostPointer(collidables,numCollidables); //run kernel { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(gpuRays.getBufferCL()); launcher.setBuffer(gpuHitResults.getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(gpuBodies.getBufferCL()); launcher.setBuffer(gpuCollidables.getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } //copy results gpuHitResults.copyToHost(hitResults); }
ParticleInternalData() : m_clPositionBuffer(0), m_velocitiesGPU(0), m_simParamGPU(0), m_updatePositionsKernel(0), m_updatePositionsKernel2(0), m_updateAabbsKernel(0), m_collideParticlesKernel(0) { m_simParamCPU.resize(1); }
SW_And_OpenGLGuiHelper(CommonGraphicsApp* glApp, bool useOpenGL2, int swWidth, int swHeight, GLPrimitiveRenderer* primRenderer) : OpenGLGuiHelper(glApp, useOpenGL2), m_swWidth(swWidth), m_swHeight(swHeight), m_rgbColorBuffer(swWidth, swHeight, TGAImage::RGB), m_primRenderer(primRenderer) { m_depthBuffer.resize(swWidth * swHeight); CommonRenderInterface* render = getRenderInterface(); m_image = new unsigned char[m_swWidth * m_swHeight * 4]; m_textureHandle = render->registerTexture(m_image, m_swWidth, m_swHeight); }
bool notExist(const b3Vector3& planeEquation,const b3AlignedObjectArray<b3Vector3>& planeEquations) { int numbrushes = planeEquations.size(); for (int i=0;i<numbrushes;i++) { const b3Vector3& N1 = planeEquations[i]; if (planeEquation.dot(N1) > b3Scalar(0.999)) { return false; } } return true; }
static void clipEdge(const mat<4, 3, float>& triangleIn, int vertexIndexA, int vertexIndexB, b3AlignedObjectArray<Vec4f>& vertices) { Vec4f v0New = triangleIn.col(vertexIndexA); Vec4f v1New = triangleIn.col(vertexIndexB); bool v0Inside = v0New[3] > 0.f && v0New[2] > -v0New[3]; bool v1Inside = v1New[3] > 0.f && v1New[2] > -v1New[3]; if (v0Inside && v1Inside) { } else if (v0Inside || v1Inside) { float d0 = v0New[2] + v0New[3]; float d1 = v1New[2] + v1New[3]; float factor = 1.0 / (d1 - d0); Vec4f newVertex = (v0New * d1 - v1New * d0) * factor; if (v0Inside) { v1New = newVertex; } else { v0New = newVertex; } } else { return; } if (vertices.size() == 0 || !(equals(vertices[vertices.size() - 1], v0New))) { vertices.push_back(v0New); } vertices.push_back(v1New); }
bool b3GeometryUtil::areVerticesBehindPlane(const b3Vector3& planeNormal, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar margin) { int numvertices = vertices.size(); for (int i=0;i<numvertices;i++) { const b3Vector3& N1 = vertices[i]; b3Scalar dist = b3Scalar(planeNormal.dot(N1))+b3Scalar(planeNormal[3])-margin; if (dist>b3Scalar(0.)) { return false; } } return true; }
TinyRendererSetupInternalData(int width, int height) :m_roll(0), m_pitch(0), m_yaw(0), m_width(width), m_height(height), m_rgbColorBuffer(width,height,TGAImage::RGB), m_textureHandle(0), m_animateRenderer(0) { m_depthBuffer.resize(m_width*m_height); }
bool b3GeometryUtil::isPointInsidePlanes(const b3AlignedObjectArray<b3Vector3>& planeEquations, const b3Vector3& point, b3Scalar margin) { int numbrushes = planeEquations.size(); for (int i=0;i<numbrushes;i++) { const b3Vector3& N1 = planeEquations[i]; b3Scalar dist = b3Scalar(N1.dot(point))+b3Scalar(N1[3])-margin; if (dist>b3Scalar(0.)) { return false; } } return true; }
void MyComboBoxCallback(int comboId, const char* item) { //printf("comboId = %d, item = %s\n",comboId, item); if (comboId==DEMO_SELECTION_COMBOBOX) { //find selected item for (int i=0;i<allNames.size();i++) { if (strcmp(item,allNames[i])==0) { selectDemo(i); saveCurrentSettings(sCurrentDemoIndex,startFileName); break; } } } }
void MyComboBoxCallback(int comboId, const char* item) { int numDemos = demoNames.size(); for (int i=0;i<numDemos;i++) { if (!strcmp(demoNames[i],item)) { if (selectedDemo != i) { gReset = true; selectedDemo = i; printf("selected demo %s!\n", item); } } } }
RenderInstancingDemo(CommonGraphicsApp* app) :m_app(app), m_x(0), m_y(0), m_z(0) { m_app->setUpAxis(2); { b3Vector3 extents=b3MakeVector3(100,100,100); extents[m_app->getUpAxis()]=1; int xres = 20; int yres = 20; b3Vector4 color0=b3MakeVector4(0.1, 0.1, 0.1,1); b3Vector4 color1=b3MakeVector4(0.6, 0.6, 0.6,1); m_app->registerGrid(xres, yres, color0, color1); } { int boxId = m_app->registerCubeShape(0.1,0.1,0.1); for (int i=-numCubesX/2;i<numCubesX/2;i++) { for (int j = -numCubesY/2;j<numCubesY/2;j++) { b3Vector3 pos=b3MakeVector3(i,j,j); pos[app->getUpAxis()] = 1; b3Quaternion orn(0,0,0,1); b3Vector4 color=b3MakeVector4(0.3,0.3,0.3,1); b3Vector3 scaling=b3MakeVector3(1,1,1); int instanceId = m_app->m_renderer->registerGraphicsInstance(boxId,pos,orn,color,scaling); m_movingInstances.push_back(instanceId); } } } m_app->m_renderer->writeTransforms(); }
int main(int argc, char* argv[]) { int sz = sizeof(b3Generic6DofConstraint); int sz2 = sizeof(b3Point2PointConstraint); int sz3 = sizeof(b3TypedConstraint); int sz4 = sizeof(b3TranslationalLimitMotor); int sz5 = sizeof(b3RotationalLimitMotor); int sz6 = sizeof(b3Transform); //b3OpenCLUtils::setCachePath("/Users/erwincoumans/develop/mycache"); b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile); b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile); b3SetCustomPrintfFunc(myprintf); b3Vector3 test=b3MakeVector3(1,2,3); test.x = 1; test.y = 4; printf("main start"); b3CommandLineArgs args(argc,argv); ParticleDemo::ConstructionInfo ci; if (args.CheckCmdLineFlag("help")) { Usage(); return 0; } selectedDemo = loadCurrentDemoEntry(sStartFileName); args.GetCmdLineArgument("selected_demo",selectedDemo); if (args.CheckCmdLineFlag("new_batching")) { useNewBatchingKernel = true; } bool benchmark=args.CheckCmdLineFlag("benchmark"); args.GetCmdLineArgument("max_framecount",maxFrameCount); args.GetCmdLineArgument("shadowmap_size",shadowMapWorldSize); args.GetCmdLineArgument("shadowmap_resolution",shadowMapWidth); shadowMapHeight=shadowMapWidth; if (args.CheckCmdLineFlag("disable_shadowmap")) { useShadowMap = false; } args.GetCmdLineArgument("pair_benchmark_file",gPairBenchFileName); gDebugLauncherCL = args.CheckCmdLineFlag("debug_kernel_launch"); dump_timings=args.CheckCmdLineFlag("dump_timings"); ci.useOpenCL = !args.CheckCmdLineFlag("disable_opencl"); ci.m_useConcaveMesh = true;//args.CheckCmdLineFlag("use_concave_mesh"); if (ci.m_useConcaveMesh) { enableExperimentalCpuConcaveCollision = true; } ci.m_useInstancedCollisionShapes = !args.CheckCmdLineFlag("no_instanced_collision_shapes"); args.GetCmdLineArgument("cl_device", ci.preferredOpenCLDeviceIndex); args.GetCmdLineArgument("cl_platform", ci.preferredOpenCLPlatformIndex); gAllowCpuOpenCL = args.CheckCmdLineFlag("allow_opencl_cpu"); gUseLargeBatches = args.CheckCmdLineFlag("use_large_batches"); gUseJacobi = args.CheckCmdLineFlag("use_jacobi"); gUseDbvt = args.CheckCmdLineFlag("use_dbvt"); gDumpContactStats = args.CheckCmdLineFlag("dump_contact_stats"); gCalcWorldSpaceAabbOnCpu = args.CheckCmdLineFlag("calc_aabb_cpu"); gUseCalculateOverlappingPairsHost = args.CheckCmdLineFlag("calc_pairs_cpu"); gIntegrateOnCpu = args.CheckCmdLineFlag("integrate_cpu"); gConvertConstraintOnCpu = args.CheckCmdLineFlag("convert_constraints_cpu"); useUniformGrid = args.CheckCmdLineFlag("use_uniform_grid"); args.GetCmdLineArgument("x_dim", ci.arraySizeX); args.GetCmdLineArgument("y_dim", ci.arraySizeY); args.GetCmdLineArgument("z_dim", ci.arraySizeZ); args.GetCmdLineArgument("x_gap", ci.gapX); args.GetCmdLineArgument("y_gap", ci.gapY); args.GetCmdLineArgument("z_gap", ci.gapZ); gPause = args.CheckCmdLineFlag("paused"); gDebugForceLoadingFromSource = args.CheckCmdLineFlag("load_cl_kernels_from_disk"); gDebugSkipLoadingBinary = args.CheckCmdLineFlag("disable_cached_cl_kernels"); #ifndef B3_NO_PROFILE b3ProfileManager::Reset(); #endif //B3_NO_PROFILE window = new b3gDefaultOpenGLWindow(); b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight); window->createWindow(wci); window->setResizeCallback(MyResizeCallback); window->setMouseMoveCallback(MyMouseMoveCallback); window->setMouseButtonCallback(MyMouseButtonCallback); window->setKeyboardCallback(MyKeyboardCallback); window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org"); printf("-----------------------------------------------------\n"); #ifndef __APPLE__ glewInit(); #endif gui = new GwenUserInterface(); printf("started GwenUserInterface"); GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight); stash = initFont(&prim); if (gui) { gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale()); printf("init fonts"); gui->setToggleButtonCallback(MyButtonCallback); gui->registerToggleButton(MYPAUSE,"Pause"); gui->registerToggleButton(MYPROFILE,"Profile"); gui->registerToggleButton(MYRESET,"Reset"); int numItems = sizeof(allDemos)/sizeof(ParticleDemo::CreateFunc*); demoNames.clear(); for (int i=0;i<numItems;i++) { GpuDemo* demo = allDemos[i](); demoNames.push_back(demo->getName()); delete demo; } gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0],selectedDemo); gui->setComboBoxCallback(MyComboBoxCallback); } do { bool syncOnly = false; gReset = false; { GLint err; glEnable(GL_BLEND); err = glGetError(); b3Assert(err==GL_NO_ERROR); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glDisable(GL_DEPTH_TEST); err = glGetError(); b3Assert(err==GL_NO_ERROR); window->startRendering(); glClearColor(1,1,1,1); glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); sth_begin_draw(stash); //sth_draw_text(stash, droidRegular,12.f, dx, dy-50, "How does this OpenGL True Type font look? ", &dx,width,height); int spacing = 0;//g_OpenGLHeight; float sx,sy,dx,dy,lh; sx = 0; sy = g_OpenGLHeight; dx = sx; dy = sy; //if (1) const char* msg[] = {"Please wait, initializing the OpenCL demo", "Please make sure to run the demo on a high-end discrete GPU with OpenCL support", "The first time it can take a bit longer to compile the OpenCL kernels.", "Check the console if it takes longer than 1 minute or if a demos has issues.", "Please share the full commandline output when reporting issues:", "App_Bullet3_OpenCL_Demos_* >> error.log", "", "", #ifdef _DEBUG "Some of the demos load a large .obj file,", "please use an optimized build of this app for faster parsing", "", "", #endif "You can press F1 to create a single screenshot,", "or press F2 toggle screenshot (useful to create movies)", "", "", "There are various command-line options such as --benchmark", "See http://github.com/erwincoumans/bullet3 for more information" }; int fontSize = 68; int nummsg = sizeof(msg)/sizeof(const char*); for (int i=0;i<nummsg;i++) { char txt[512]; sprintf(txt,"%s",msg[i]); //sth_draw_text(stash, droidRegular,i, 10, dy-spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight); sth_draw_text(stash, droidRegular,fontSize, 10, spacing, txt, &dx,g_OpenGLWidth,g_OpenGLHeight); spacing+=fontSize; fontSize = 32; } sth_end_draw(stash); sth_flush_draw(stash); window->endRendering(); } static bool once=true; //glClearColor(0.3f, 0.3f, 0.3f, 1.0f); glClearColor(1,1,1,1); glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); window->setWheelCallback(b3DefaultWheelCallback); { GpuDemo* demo = allDemos[selectedDemo](); sDemo = demo; // demo->myinit(); bool useGpu = false; //int maxObjectCapacity=128*1024; int maxObjectCapacity=1024*1024; maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10); { ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity);//render.getInstancingRenderer(); ci.m_window = window; ci.m_gui = gui; ci.m_instancingRenderer->init(); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->InitShaders(); ci.m_primRenderer = &prim; // render.init(); } { demo->initPhysics(ci); } printf("-----------------------------------------------------\n"); FILE* csvFile = 0; FILE* detailsFile = 0; if (benchmark) { char prefixFileName[1024]; char csvFileName[1024]; char detailsFileName[1024]; b3OpenCLDeviceInfo info; b3OpenCLUtils::getDeviceInfo(demo->getInternalData()->m_clDevice,&info); //todo: move this time stuff into the Platform/Window class #ifdef _WIN32 SYSTEMTIME time; GetLocalTime(&time); char buf[1024]; DWORD dwCompNameLen = 1024; if (0 != GetComputerName(buf, &dwCompNameLen)) { printf("%s", buf); } else { printf("unknown", buf); } sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond); #else timeval now; gettimeofday(&now,0); struct tm* ptm; ptm = localtime (&now.tv_sec); char buf[1024]; #ifdef __APPLE__ sprintf(buf,"MacOSX"); #else sprintf(buf,"Unix"); #endif sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ, ptm->tm_mday, ptm->tm_mon+1, ptm->tm_year+1900, ptm->tm_hour, ptm->tm_min, ptm->tm_sec); #endif sprintf(csvFileName,"%s.csv",prefixFileName); sprintf(detailsFileName,"%s.txt",prefixFileName); printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName); //GetSystemTime(&time2); csvFile=fopen(csvFileName,"w"); detailsFile = fopen(detailsFileName,"w"); if (detailsFile) defaultOutput = detailsFile; //if (f) // fprintf(f,"%s (%dx%dx%d=%d),\n", g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ); } fprintf(defaultOutput,"Demo settings:\n"); fprintf(defaultOutput," SelectedDemo=%d, demoname = %s\n", selectedDemo, demo->getName()); fprintf(defaultOutput," x_dim=%d, y_dim=%d, z_dim=%d\n",ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ); fprintf(defaultOutput," x_gap=%f, y_gap=%f, z_gap=%f\n",ci.gapX,ci.gapY,ci.gapZ); fprintf(defaultOutput,"\nOpenCL settings:\n"); fprintf(defaultOutput," Preferred cl_device index %d\n", ci.preferredOpenCLDeviceIndex); fprintf(defaultOutput," Preferred cl_platform index%d\n", ci.preferredOpenCLPlatformIndex); fprintf(defaultOutput,"\n"); if (demo->getInternalData()->m_platformId) { b3OpenCLUtils::printPlatformInfo( demo->getInternalData()->m_platformId); fprintf(defaultOutput,"\n"); b3OpenCLUtils::printDeviceInfo( demo->getInternalData()->m_clDevice); fprintf(defaultOutput,"\n"); } do { GLint err = glGetError(); assert(err==GL_NO_ERROR); if (exportFrame || exportMovie) { if (!renderTexture) { renderTexture = new GLRenderToTexture(); GLuint renderTextureId; glGenTextures(1, &renderTextureId); // "Bind" the newly created texture : all future texture functions will modify this texture glBindTexture(GL_TEXTURE_2D, renderTextureId); // Give an empty image to OpenGL ( the last "0" ) //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0); //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR); } bool result = renderTexture->enable(); } err = glGetError(); assert(err==GL_NO_ERROR); b3ProfileManager::Reset(); b3ProfileManager::Increment_Frame_Counter(); // render.reshape(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight); err = glGetError(); assert(err==GL_NO_ERROR); window->startRendering(); err = glGetError(); assert(err==GL_NO_ERROR); glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); err = glGetError(); assert(err==GL_NO_ERROR); if (!gPause) { B3_PROFILE("clientMoveAndDisplay"); demo->clientMoveAndDisplay(); } else { } { B3_PROFILE("renderScene"); demo->renderScene(); } err = glGetError(); assert(err==GL_NO_ERROR); /*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects()) { B3_PROFILE("renderPhysicsWorld"); b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray(); b3CollisionObject** colObjArray = &arr[0]; render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly); syncOnly = true; } */ if (exportFrame || exportMovie) { char fileName[1024]; sprintf(fileName,"screenShot%d.png",frameIndex++); writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName); exportFrame = false; renderTexture->disable(); } { B3_PROFILE("gui->draw"); if (gui && gDrawGui) gui->draw(g_OpenGLWidth,g_OpenGLHeight); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("window->endRendering"); window->endRendering(); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("glFinish"); } if (dump_timings) { b3ProfileManager::dumpAll(stdout); } if (csvFile) { static int frameCount=0; if (frameCount>0) { DumpSimulationTime(csvFile); if (detailsFile) { fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount); b3ProfileManager::dumpAll(detailsFile); } } if (frameCount>=maxFrameCount) window->setRequestExit(); frameCount++; } if (gStep) gPause=true; } while (!window->requestedExit() && !gReset); demo->exitPhysics(); b3ProfileManager::CleanupMemory(); delete ci.m_instancingRenderer; delete demo; sDemo = 0; if (detailsFile) { fclose(detailsFile); detailsFile=0; } if (csvFile) { fclose(csvFile); csvFile=0; } } } while (gReset); if (gui) gui->setComboBoxCallback(0); { delete gui; gui=0; exitFont(); window->closeWindow(); delete window; window = 0; } return 0; }
inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3(b3BatchConstraint* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies) { //int sz = sizeof(b3BatchConstraint); B3_PROFILE("sortConstraintByBatch3"); static int maxSwaps = 0; int numSwaps = 0; curUsed.resize(2 * simdWidth); static int maxNumConstraints = 0; if (maxNumConstraints < numConstraints) { maxNumConstraints = numConstraints; //printf("maxNumConstraints = %d\n",maxNumConstraints ); } int numUsedArray = numBodies / 32 + 1; bodyUsed.resize(numUsedArray); for (int q = 0; q < numUsedArray; q++) bodyUsed[q] = 0; int curBodyUsed = 0; int numIter = 0; #if defined(_DEBUG) for (int i = 0; i < numConstraints; i++) cs[i].m_batchId = -1; #endif int numValidConstraints = 0; // int unprocessedConstraintIndex = 0; int batchIdx = 0; { B3_PROFILE("cpu batch innerloop"); while (numValidConstraints < numConstraints) { numIter++; int nCurrentBatch = 0; // clear flag for (int i = 0; i < curBodyUsed; i++) bodyUsed[curUsed[i] / 32] = 0; curBodyUsed = 0; for (int i = numValidConstraints; i < numConstraints; i++) { int idx = i; b3Assert(idx < numConstraints); // check if it can go int bodyAS = cs[idx].m_bodyAPtrAndSignBit; int bodyBS = cs[idx].m_bodyBPtrAndSignBit; int bodyA = abs(bodyAS); int bodyB = abs(bodyBS); bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; int aUnavailable = 0; int bUnavailable = 0; if (!aIsStatic) { aUnavailable = bodyUsed[bodyA / 32] & (1 << (bodyA & 31)); } if (!aUnavailable) if (!bIsStatic) { bUnavailable = bodyUsed[bodyB / 32] & (1 << (bodyB & 31)); } if (aUnavailable == 0 && bUnavailable == 0) // ok { if (!aIsStatic) { bodyUsed[bodyA / 32] |= (1 << (bodyA & 31)); curUsed[curBodyUsed++] = bodyA; } if (!bIsStatic) { bodyUsed[bodyB / 32] |= (1 << (bodyB & 31)); curUsed[curBodyUsed++] = bodyB; } cs[idx].m_batchId = batchIdx; if (i != numValidConstraints) { b3Swap(cs[i], cs[numValidConstraints]); numSwaps++; } numValidConstraints++; { nCurrentBatch++; if (nCurrentBatch == simdWidth) { nCurrentBatch = 0; for (int i = 0; i < curBodyUsed; i++) bodyUsed[curUsed[i] / 32] = 0; curBodyUsed = 0; } } } } m_gpuData->m_batchSizes.push_back(nCurrentBatch); batchIdx++; } } #if defined(_DEBUG) // debugPrintf( "nBatches: %d\n", batchIdx ); for (int i = 0; i < numConstraints; i++) { b3Assert(cs[i].m_batchId != -1); } #endif if (maxSwaps < numSwaps) { maxSwaps = numSwaps; //printf("maxSwaps = %d\n", maxSwaps); } return batchIdx; }
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal) { //only create the batches once. //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); bool createBatches = m_gpuData->m_batchSizes.size() == 0; { if (createBatches) { m_gpuData->m_batchSizes.resize(0); { m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); B3_PROFILE("batch joints"); b3Assert(batchConstraints.size() == numConstraints); int simdWidth = numConstraints + 1; int numBodies = m_tmpSolverBodyPool.size(); sortConstraintByBatch3(&batchConstraints[0], numConstraints, simdWidth, m_staticIdx, numBodies); m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); } } else { /*b3AlignedObjectArray<b3BatchConstraint> cpuCheckBatches; m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches); b3Assert(cpuCheckBatches.size()==batchConstraints.size()); printf(".\n"); */ //>copyFromHost(batchConstraints); } int maxIterations = infoGlobal.m_numIterations; bool useBatching = true; if (useBatching) { if (!useGpuSolveJointConstraintRows) { B3_PROFILE("copy to host"); m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints); } for (int iteration = 0; iteration < maxIterations; iteration++) { int batchOffset = 0; int constraintOffset = 0; int numBatches = m_gpuData->m_batchSizes.size(); for (int bb = 0; bb < numBatches; bb++) { int numConstraintsInBatch = m_gpuData->m_batchSizes[bb]; if (useGpuSolveJointConstraintRows) { B3_PROFILE("solveJointConstraintRowsKernels"); /* __kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, __global b3BatchConstraint* batchConstraints, __global b3SolverConstraint* rows, __global unsigned int* numConstraintRowsInfo1, __global unsigned int* rowOffsets, __global b3GpuGenericConstraint* constraints, int batchOffset, int numConstraintsInBatch*/ b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_solveJointConstraintRowsKernels, "m_solveJointConstraintRowsKernels"); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(gpuConstraints1->getBufferCL()); //to detect disabled constraints launcher.setConst(batchOffset); launcher.setConst(numConstraintsInBatch); launcher.launch1D(numConstraintsInBatch); } else //useGpu { for (int b = 0; b < numConstraintsInBatch; b++) { const b3BatchConstraint& c = batchConstraints[batchOffset + b]; /*printf("-----------\n"); printf("bb=%d\n",bb); printf("c.batchId = %d\n", c.m_batchId); */ b3Assert(c.m_batchId == bb); b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex]; if (constraint->m_flags & B3_CONSTRAINT_FLAG_ENABLED) { int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex]; int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex]; for (int jj = 0; jj < numConstraintRows; jj++) { // b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset + jj]; //resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint); resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); } } } } //useGpu batchOffset += numConstraintsInBatch; constraintOffset += numConstraintsInBatch; } } //for (int iteration... if (!useGpuSolveJointConstraintRows) { { B3_PROFILE("copy from host"); m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); } //B3_PROFILE("copy to host"); //m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); } //int sz = sizeof(b3GpuSolverBody); //printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz); } else { for (int iteration = 0; iteration < maxIterations; iteration++) { int numJoints = m_tmpSolverNonContactConstraintPool.size(); for (int j = 0; j < numJoints; j++) { b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j]; resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); } if (!m_usePgs) { averageVelocities(); } } } } clFinish(m_gpuData->m_queue); return 0.f; }
b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) { B3_PROFILE("GPU solveGroupCacheFriendlySetup"); batchConstraints.resize(numConstraints); m_gpuData->m_gpuBatchConstraints->resize(numConstraints); m_staticIdx = -1; m_maxOverrideNumSolverIterations = 0; /* m_gpuData->m_gpuBodies->resize(numBodies); m_gpuData->m_gpuBodies->copyFromHostPointer(bodies,numBodies); b3OpenCLArray<b3InertiaData> gpuInertias(m_gpuData->m_context,m_gpuData->m_queue); gpuInertias.resize(numBodies); gpuInertias.copyFromHostPointer(inertias,numBodies); */ m_gpuData->m_gpuSolverBodies->resize(numBodies); m_tmpSolverBodyPool.resize(numBodies); { if (useGpuInitSolverBodies) { B3_PROFILE("m_initSolverBodiesKernel"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initSolverBodiesKernel, "m_initSolverBodiesKernel"); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setConst(numBodies); launcher.launch1D(numBodies); clFinish(m_gpuData->m_queue); // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); } else { gpuBodies->copyToHost(m_gpuData->m_cpuBodies); for (int i = 0; i < numBodies; i++) { b3RigidBodyData& body = m_gpuData->m_cpuBodies[i]; b3GpuSolverBody& solverBody = m_tmpSolverBodyPool[i]; initSolverBody(i, &solverBody, &body); solverBody.m_originalBodyIndex = i; } m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); } } // int totalBodies = 0; int totalNumRows = 0; //b3RigidBody* rb0=0,*rb1=0; //if (1) { { // int i; m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints); // b3OpenCLArray<b3GpuGenericConstraint> gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue); if (useGpuInfo1) { B3_PROFILE("info1 and init batchConstraint"); m_gpuData->m_gpuConstraintInfo1->resize(numConstraints); if (1) { B3_PROFILE("getInfo1Kernel"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo1Kernel, "m_getInfo1Kernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); clFinish(m_gpuData->m_queue); } if (m_gpuData->m_batchSizes.size() == 0) { B3_PROFILE("initBatchConstraintsKernel"); m_gpuData->m_gpuConstraintRowOffsets->resize(numConstraints); unsigned int total = 0; m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1, *m_gpuData->m_gpuConstraintRowOffsets, numConstraints, &total); unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints - 1); totalNumRows = total + lastElem; { B3_PROFILE("init batch constraints"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initBatchConstraintsKernel, "m_initBatchConstraintsKernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); clFinish(m_gpuData->m_queue); } //assume the batching happens on CPU, so copy the data m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); } } else { totalNumRows = 0; gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); //calculate the total number of contraint rows for (int i = 0; i < numConstraints; i++) { unsigned int& info1 = m_tmpConstraintSizesPool[i]; // unsigned int info1; if (m_gpuData->m_cpuConstraints[i].isEnabled()) { m_gpuData->m_cpuConstraints[i].getInfo1(&info1, &m_gpuData->m_cpuBodies[0]); } else { info1 = 0; } totalNumRows += info1; } m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); } m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); m_gpuData->m_gpuConstraintRows->resize(totalNumRows); // b3GpuConstraintArray verify; if (useGpuInfo2) { { B3_PROFILE("getInfo2Kernel"); b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo2Kernel, "m_getInfo2Kernel"); launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); launcher.setBuffer(gpuConstraints->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); launcher.setBuffer(gpuBodies->getBufferCL()); launcher.setBuffer(gpuInertias->getBufferCL()); launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); launcher.setConst(infoGlobal.m_timeStep); launcher.setConst(infoGlobal.m_erp); launcher.setConst(infoGlobal.m_globalCfm); launcher.setConst(infoGlobal.m_damping); launcher.setConst(infoGlobal.m_numIterations); launcher.setConst(numConstraints); launcher.launch1D(numConstraints); clFinish(m_gpuData->m_queue); if (m_gpuData->m_batchSizes.size() == 0) m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); //m_gpuData->m_gpuConstraintRows->copyToHost(verify); //m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); } } else { gpuInertias->copyToHost(m_gpuData->m_cpuInertias); ///setup the b3SolverConstraints for (int i = 0; i < numConstraints; i++) { const int& info1 = m_tmpConstraintSizesPool[i]; if (info1) { int constraintIndex = batchConstraints[i].m_originalConstraintIndex; int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[constraintIndex]; b3GpuSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[constraintRowOffset]; b3GpuGenericConstraint& constraint = m_gpuData->m_cpuConstraints[i]; b3RigidBodyData& rbA = m_gpuData->m_cpuBodies[constraint.getRigidBodyA()]; //b3RigidBody& rbA = constraint.getRigidBodyA(); // b3RigidBody& rbB = constraint.getRigidBodyB(); b3RigidBodyData& rbB = m_gpuData->m_cpuBodies[constraint.getRigidBodyB()]; int solverBodyIdA = constraint.getRigidBodyA(); //getOrInitSolverBody(constraint.getRigidBodyA(),bodies,inertias); int solverBodyIdB = constraint.getRigidBodyB(); //getOrInitSolverBody(constraint.getRigidBodyB(),bodies,inertias); b3GpuSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA]; b3GpuSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB]; if (rbA.m_invMass) { batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; } else { if (!solverBodyIdA) m_staticIdx = 0; batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; } if (rbB.m_invMass) { batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; } else { if (!solverBodyIdB) m_staticIdx = 0; batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; } int overrideNumSolverIterations = 0; //constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; if (overrideNumSolverIterations > m_maxOverrideNumSolverIterations) m_maxOverrideNumSolverIterations = overrideNumSolverIterations; int j; for (j = 0; j < info1; j++) { memset(¤tConstraintRow[j], 0, sizeof(b3GpuSolverConstraint)); currentConstraintRow[j].m_angularComponentA.setValue(0, 0, 0); currentConstraintRow[j].m_angularComponentB.setValue(0, 0, 0); currentConstraintRow[j].m_appliedImpulse = 0.f; currentConstraintRow[j].m_appliedPushImpulse = 0.f; currentConstraintRow[j].m_cfm = 0.f; currentConstraintRow[j].m_contactNormal.setValue(0, 0, 0); currentConstraintRow[j].m_friction = 0.f; currentConstraintRow[j].m_frictionIndex = 0; currentConstraintRow[j].m_jacDiagABInv = 0.f; currentConstraintRow[j].m_lowerLimit = 0.f; currentConstraintRow[j].m_upperLimit = 0.f; currentConstraintRow[j].m_originalContactPoint = 0; currentConstraintRow[j].m_overrideNumSolverIterations = 0; currentConstraintRow[j].m_relpos1CrossNormal.setValue(0, 0, 0); currentConstraintRow[j].m_relpos2CrossNormal.setValue(0, 0, 0); currentConstraintRow[j].m_rhs = 0.f; currentConstraintRow[j].m_rhsPenetration = 0.f; currentConstraintRow[j].m_solverBodyIdA = 0; currentConstraintRow[j].m_solverBodyIdB = 0; currentConstraintRow[j].m_lowerLimit = -B3_INFINITY; currentConstraintRow[j].m_upperLimit = B3_INFINITY; currentConstraintRow[j].m_appliedImpulse = 0.f; currentConstraintRow[j].m_appliedPushImpulse = 0.f; currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA; currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB; currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; } bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); bodyAPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); bodyAPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); bodyBPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); bodyBPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); b3GpuConstraintInfo2 info2; info2.fps = 1.f / infoGlobal.m_timeStep; info2.erp = infoGlobal.m_erp; info2.m_J1linearAxis = currentConstraintRow->m_contactNormal; info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal; info2.m_J2linearAxis = 0; info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal; info2.rowskip = sizeof(b3GpuSolverConstraint) / sizeof(b3Scalar); //check this ///the size of b3GpuSolverConstraint needs be a multiple of b3Scalar b3Assert(info2.rowskip * sizeof(b3Scalar) == sizeof(b3GpuSolverConstraint)); info2.m_constraintError = ¤tConstraintRow->m_rhs; currentConstraintRow->m_cfm = infoGlobal.m_globalCfm; info2.m_damping = infoGlobal.m_damping; info2.cfm = ¤tConstraintRow->m_cfm; info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; info2.m_numIterations = infoGlobal.m_numIterations; m_gpuData->m_cpuConstraints[i].getInfo2(&info2, &m_gpuData->m_cpuBodies[0]); ///finalize the constraint setup for (j = 0; j < info1; j++) { b3GpuSolverConstraint& solverConstraint = currentConstraintRow[j]; if (solverConstraint.m_upperLimit >= m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) { solverConstraint.m_upperLimit = m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); } if (solverConstraint.m_lowerLimit <= -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) { solverConstraint.m_lowerLimit = -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); } // solverConstraint.m_originalContactPoint = constraint; b3Matrix3x3& invInertiaWorldA = m_gpuData->m_cpuInertias[constraint.getRigidBodyA()].m_invInertiaWorld; { //b3Vector3 angularFactorA(1,1,1); const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal; solverConstraint.m_angularComponentA = invInertiaWorldA * ftorqueAxis1; //*angularFactorA; } b3Matrix3x3& invInertiaWorldB = m_gpuData->m_cpuInertias[constraint.getRigidBodyB()].m_invInertiaWorld; { const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal; solverConstraint.m_angularComponentB = invInertiaWorldB * ftorqueAxis2; //*constraint.getRigidBodyB().getAngularFactor(); } { //it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal //because it gets multiplied iMJlB b3Vector3 iMJlA = solverConstraint.m_contactNormal * rbA.m_invMass; b3Vector3 iMJaA = invInertiaWorldA * solverConstraint.m_relpos1CrossNormal; b3Vector3 iMJlB = solverConstraint.m_contactNormal * rbB.m_invMass; //sign of normal? b3Vector3 iMJaB = invInertiaWorldB * solverConstraint.m_relpos2CrossNormal; b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal); sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); sum += iMJlB.dot(solverConstraint.m_contactNormal); sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); b3Scalar fsum = b3Fabs(sum); b3Assert(fsum > B3_EPSILON); solverConstraint.m_jacDiagABInv = fsum > B3_EPSILON ? b3Scalar(1.) / sum : 0.f; } ///fix rhs ///todo: add force/torque accelerators { b3Scalar rel_vel; b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel); b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel); rel_vel = vel1Dotn + vel2Dotn; b3Scalar restitution = 0.f; b3Scalar positionalError = solverConstraint.m_rhs; //already filled in by getConstraintInfo2 b3Scalar velocityError = restitution - rel_vel * info2.m_damping; b3Scalar penetrationImpulse = positionalError * solverConstraint.m_jacDiagABInv; b3Scalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv; solverConstraint.m_rhs = penetrationImpulse + velocityImpulse; solverConstraint.m_appliedImpulse = 0.f; } } } } m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); if (m_gpuData->m_batchSizes.size() == 0) m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); else m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); } //end useGpuInfo2 } #ifdef B3_SUPPORT_CONTACT_CONSTRAINTS { int i; for (i = 0; i < numManifolds; i++) { b3Contact4& manifold = manifoldPtr[i]; convertContact(bodies, inertias, &manifold, infoGlobal); } } #endif //B3_SUPPORT_CONTACT_CONSTRAINTS } // b3ContactSolverInfo info = infoGlobal; // int numNonContactPool = m_tmpSolverNonContactConstraintPool.size(); // int numConstraintPool = m_tmpSolverContactConstraintPool.size(); // int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size(); return 0.f; }
int main(int argc, char* argv[]) { b3SetCustomEnterProfileZoneFunc(b3ProfileManager::Start_Profile); b3SetCustomLeaveProfileZoneFunc(b3ProfileManager::Stop_Profile); b3SetCustomPrintfFunc(myprintf); b3Vector3 test=b3MakeVector3(1,2,3); test.x = 1; test.y = 4; b3Printf("main start"); b3CommandLineArgs args(argc,argv); if (args.CheckCmdLineFlag("help")) { Usage(); return 0; } args.GetCmdLineArgument("selected_demo",selectedDemo); bool benchmark=args.CheckCmdLineFlag("benchmark"); args.GetCmdLineArgument("max_framecount",maxFrameCount); dump_timings=args.CheckCmdLineFlag("dump_timings"); #ifndef B3_NO_PROFILE b3ProfileManager::Reset(); #endif //B3_NO_PROFILE window = new b3gDefaultOpenGLWindow(); b3gWindowConstructionInfo wci(g_OpenGLWidth,g_OpenGLHeight); window->createWindow(wci); window->setResizeCallback(MyResizeCallback); window->setMouseMoveCallback(MyMouseMoveCallback); window->setMouseButtonCallback(MyMouseButtonCallback); window->setKeyboardCallback(MyKeyboardCallback); window->setWindowTitle("Bullet 3.x GPU Rigid Body http://bulletphysics.org"); #ifndef __APPLE__ glewInit(); #endif gui = new GwenUserInterface(); b3Printf("started GwenUserInterface\n"); GLPrimitiveRenderer prim(g_OpenGLWidth,g_OpenGLHeight); stash = initFont(&prim); if (gui) { gui->init(g_OpenGLWidth,g_OpenGLHeight,stash,window->getRetinaScale()); b3Printf("init fonts\n"); gui->setToggleButtonCallback(MyButtonCallback); gui->registerToggleButton(MYPAUSE,"Pause"); gui->registerToggleButton(MYPROFILE,"Profile"); gui->registerToggleButton(MYRESET,"Reset"); int numItems = sizeof(allDemos)/sizeof(CpuDemo::CreateFunc*); demoNames.clear(); for (int i=0;i<numItems;i++) { CpuDemo* demo = allDemos[i](); demoNames.push_back(demo->getName()); delete demo; } gui->registerComboBox(MYCOMBOBOX1,numItems,&demoNames[0]); gui->setComboBoxCallback(MyComboBoxCallback); } do { bool syncOnly = false; gReset = false; static bool once=true; //glClearColor(0.3f, 0.3f, 0.3f, 1.0f); glClearColor(1,1,1,1); glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT); window->setWheelCallback(b3DefaultWheelCallback); { CpuDemo* demo = allDemos[selectedDemo](); sDemo = demo; // demo->myinit(); bool useGpu = false; int maxObjectCapacity=1024*1024;//128*1024; int maxShapeCapacityInBytes=10*1024*1024; //maxObjectCapacity = b3Max(maxObjectCapacity,ci.arraySizeX*ci.arraySizeX*ci.arraySizeX+10); CpuDemo::ConstructionInfo ci; ci.m_instancingRenderer = new GLInstancingRenderer(maxObjectCapacity,maxShapeCapacityInBytes); ci.m_window = window; ci.m_gui = gui; ci.m_instancingRenderer->init(); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->InitShaders(); ci.m_primRenderer = &prim; // render.init(); { demo->initPhysics(ci); } FILE* csvFile = 0; FILE* detailsFile = 0; if (benchmark) { gPause = false; char prefixFileName[1024]; char csvFileName[1024]; char detailsFileName[1024]; //todo: move this time stuff into the Platform/Window class #ifdef _WIN32 SYSTEMTIME time; GetLocalTime(&time); char buf[1024]; DWORD dwCompNameLen = 1024; if (0 != GetComputerName(buf, &dwCompNameLen)) { printf("%s", buf); } else { printf("unknown", buf); } sprintf(prefixFileName,"%s_%s_%s_date_%d-%d-%d_time_%d-%d-%d","CPU",buf,demoNames[selectedDemo],time.wDay,time.wMonth,time.wYear,time.wHour,time.wMinute,time.wSecond); #else timeval now; gettimeofday(&now,0); struct tm* ptm; ptm = localtime (&now.tv_sec); char buf[1024]; #ifdef __APPLE__ sprintf(buf,"MacOSX"); #else sprintf(buf,"Unix"); #endif sprintf(prefixFileName,"%s_%s_%s_%d_%d_%d_date_%d-%d-%d_time_%d-%d-%d",info.m_deviceName,buf,demoNames[selectedDemo],ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ, ptm->tm_mday, ptm->tm_mon+1, ptm->tm_year+1900, ptm->tm_hour, ptm->tm_min, ptm->tm_sec); #endif sprintf(csvFileName,"%s.csv",prefixFileName); sprintf(detailsFileName,"%s.txt",prefixFileName); printf("Open csv file %s and details file %s\n", csvFileName,detailsFileName); //GetSystemTime(&time2); csvFile=fopen(csvFileName,"w"); detailsFile = fopen(detailsFileName,"w"); if (detailsFile) defaultOutput = detailsFile; //if (f) // fprintf(f,"%s (%dx%dx%d=%d),\n", g_deviceName,ci.arraySizeX,ci.arraySizeY,ci.arraySizeZ,ci.arraySizeX*ci.arraySizeY*ci.arraySizeZ); } do { GLint err = glGetError(); assert(err==GL_NO_ERROR); if (exportFrame || exportMovie) { if (!renderTexture) { renderTexture = new GLRenderToTexture(); GLuint renderTextureId; glGenTextures(1, &renderTextureId); // "Bind" the newly created texture : all future texture functions will modify this texture glBindTexture(GL_TEXTURE_2D, renderTextureId); // Give an empty image to OpenGL ( the last "0" ) //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGB, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_UNSIGNED_BYTE, 0); //glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexImage2D(GL_TEXTURE_2D, 0,GL_RGBA32F, g_OpenGLWidth,g_OpenGLHeight, 0,GL_RGBA, GL_FLOAT, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); //glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); renderTexture->init(g_OpenGLWidth,g_OpenGLHeight,renderTextureId, RENDERTEXTURE_COLOR); } bool result = renderTexture->enable(); } err = glGetError(); assert(err==GL_NO_ERROR); b3ProfileManager::Reset(); b3ProfileManager::Increment_Frame_Counter(); // render.reshape(g_OpenGLWidth,g_OpenGLHeight); ci.m_instancingRenderer->resize(g_OpenGLWidth,g_OpenGLHeight); prim.setScreenSize(g_OpenGLWidth,g_OpenGLHeight); err = glGetError(); assert(err==GL_NO_ERROR); window->startRendering(); err = glGetError(); assert(err==GL_NO_ERROR); glClear(GL_COLOR_BUFFER_BIT| GL_DEPTH_BUFFER_BIT);//|GL_STENCIL_BUFFER_BIT); glEnable(GL_DEPTH_TEST); err = glGetError(); assert(err==GL_NO_ERROR); if (!gPause) { B3_PROFILE("clientMoveAndDisplay"); demo->clientMoveAndDisplay(); } else { } { B3_PROFILE("renderScene"); demo->renderScene(); } err = glGetError(); assert(err==GL_NO_ERROR); /*if (demo->getDynamicsWorld() && demo->getDynamicsWorld()->getNumCollisionObjects()) { B3_PROFILE("renderPhysicsWorld"); b3AlignedObjectArray<b3CollisionObject*> arr = demo->getDynamicsWorld()->getCollisionObjectArray(); b3CollisionObject** colObjArray = &arr[0]; render.renderPhysicsWorld(demo->getDynamicsWorld()->getNumCollisionObjects(),colObjArray, syncOnly); syncOnly = true; } */ if (exportFrame || exportMovie) { char fileName[1024]; sprintf(fileName,"screenShot%d.png",frameIndex++); writeTextureToPng(g_OpenGLWidth,g_OpenGLHeight,fileName); exportFrame = false; renderTexture->disable(); } { B3_PROFILE("gui->draw"); if (gui && gDrawGui) gui->draw(g_OpenGLWidth,g_OpenGLHeight); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("window->endRendering"); window->endRendering(); } err = glGetError(); assert(err==GL_NO_ERROR); { B3_PROFILE("glFinish"); } if (dump_timings) { b3ProfileManager::dumpAll(stdout); } if (csvFile) { static int frameCount=0; if (frameCount>0) { DumpSimulationTime(csvFile); if (detailsFile) { fprintf(detailsFile,"\n==================================\nFrame %d:\n", frameCount); b3ProfileManager::dumpAll(detailsFile); } } if (frameCount>=maxFrameCount) window->setRequestExit(); frameCount++; } if (gStep) gPause=true; } while (!window->requestedExit() && !gReset); demo->exitPhysics(); b3ProfileManager::CleanupMemory(); delete ci.m_instancingRenderer; delete demo; sDemo = 0; if (detailsFile) { fclose(detailsFile); detailsFile=0; } if (csvFile) { fclose(csvFile); csvFile=0; } } } while (gReset); if (gui) gui->setComboBoxCallback(0); { delete gui; gui=0; exitFont(); window->closeWindow(); delete window; window = 0; } return 0; }
///todo: add some acceleration structure (AABBs, tree etc) void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase) { //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); B3_PROFILE("castRaysGPU"); { B3_PROFILE("raycast copyFromHost"); m_data->m_gpuRays->copyFromHost(rays); m_data->m_gpuHitResults->copyFromHost(hitResults); } int numRays = hitResults.size(); { m_data->m_firstRayRigidPairIndexPerRay->resize(numRays); m_data->m_numRayRigidPairsPerRay->resize(numRays); m_data->m_gpuNumRayRigidPairs->resize(1); m_data->m_gpuRayRigidPairs->resize(numRays * 16); } //run kernel const bool USE_BRUTE_FORCE_RAYCAST = false; if(USE_BRUTE_FORCE_RAYCAST) { B3_PROFILE("raycast launch1D"); b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); int numRays = rays.size(); launcher.setConst(numRays); launcher.setBuffer(m_data->m_gpuRays->getBufferCL()); launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL()); launcher.setConst(numBodies); launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); launcher.launch1D(numRays); clFinish(m_data->m_q); } else { m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() ); m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs); int numRayRigidPairs = -1; m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1); if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() ) { numRayRigidPairs = m_data->m_gpuRayRigidPairs->size(); m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1); } m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct //Sort ray-rigid pairs by ray index { B3_PROFILE("sort ray-rigid pairs"); m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) ); } //detect start,count of each ray pair { B3_PROFILE("detect ray-rigid pair index ranges"); { B3_PROFILE("reset ray-rigid pair index ranges"); m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays); clFinish(m_data->m_q); } b3BufferInfoCL bufferInfo[] = { b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ), b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges"); launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(numRayRigidPairs); launcher.launch1D(numRayRigidPairs); clFinish(m_data->m_q); } { B3_PROFILE("ray-rigid intersection"); b3BufferInfoCL bufferInfo[] = { b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ), b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ), b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ), b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel"); launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(numRays); launcher.launch1D(numRays); clFinish(m_data->m_q); } } //copy results { B3_PROFILE("raycast copyToHost"); m_data->m_gpuHitResults->copyToHost(hitResults); } }
void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) { // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); B3_PROFILE("castRaysHost"); for (int r=0;r<rays.size();r++) { b3Vector3 rayFrom = rays[r].m_from; b3Vector3 rayTo = rays[r].m_to; float hitFraction = hitResults[r].m_hitFraction; int hitBodyIndex= -1; b3Vector3 hitNormal; for (int b=0;b<numBodies;b++) { const b3Vector3& pos = bodies[b].m_pos; const b3Quaternion& orn = bodies[b].m_quat; switch (collidables[bodies[b].m_collidableIdx].m_shapeType) { case SHAPE_SPHERE: { b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius; if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction)) { hitBodyIndex = b; b3Vector3 hitPoint; hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); hitNormal = (hitPoint-bodies[b].m_pos).normalize(); } } case SHAPE_CONVEX_HULL: { b3Transform convexWorldTransform; convexWorldTransform.setIdentity(); convexWorldTransform.setOrigin(bodies[b].m_pos); convexWorldTransform.setRotation(bodies[b].m_quat); b3Transform convexWorld2Local = convexWorldTransform.inverse(); b3Vector3 rayFromLocal = convexWorld2Local(rayFrom); b3Vector3 rayToLocal = convexWorld2Local(rayTo); int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal)) { hitBodyIndex = b; } break; } default: { static bool once=true; if (once) { once=false; b3Warning("Raytest: unsupported shape type\n"); } } } } if (hitBodyIndex>=0) { hitResults[r].m_hitFraction = hitFraction; hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); hitResults[r].m_hitNormal = hitNormal; hitResults[r].m_hitBody = hitBodyIndex; } } }
bool checkData() { bool hasStatus = false; int serviceResult = enet_host_service(m_client, &m_event, 0); if (serviceResult > 0) { switch (m_event.type) { case ENET_EVENT_TYPE_CONNECT: printf("A new client connected from %x:%u.\n", m_event.peer->address.host, m_event.peer->address.port); m_event.peer->data = (void*)"New User"; break; case ENET_EVENT_TYPE_RECEIVE: { if (gVerboseNetworkMessagesClient) { printf("A packet of length %lu containing '%s' was " "received from %s on channel %u.\n", m_event.packet->dataLength, (char*)m_event.packet->data, (char*)m_event.peer->data, m_event.channelID); } int packetSizeInBytes = b3DeserializeInt(m_event.packet->data); if (packetSizeInBytes == m_event.packet->dataLength) { SharedMemoryStatus* statPtr = (SharedMemoryStatus*)&m_event.packet->data[4]; if (statPtr->m_type == CMD_STEP_FORWARD_SIMULATION_COMPLETED) { SharedMemoryStatus dummy; dummy.m_type = CMD_STEP_FORWARD_SIMULATION_COMPLETED; m_lastStatus = dummy; m_stream.resize(0); } else { m_lastStatus = *statPtr; int streamOffsetInBytes = 4 + sizeof(SharedMemoryStatus); int numStreamBytes = packetSizeInBytes - streamOffsetInBytes; m_stream.resize(numStreamBytes); for (int i = 0; i < numStreamBytes; i++) { m_stream[i] = m_event.packet->data[i + streamOffsetInBytes]; } } } else { printf("unknown status message received\n"); } enet_packet_destroy(m_event.packet); hasStatus = true; break; } case ENET_EVENT_TYPE_DISCONNECT: { printf("%s disconnected.\n", (char*)m_event.peer->data); break; } default: { printf("unknown event type: %d.\n", m_event.type); } } } else if (serviceResult > 0) { puts("Error with servicing the client"); } return hasStatus; }
void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) { B3_PROFILE("solveContacts"); m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies); m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies); m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts); if (optionalSortContactsDeterminism) { if (!gCpuSortContactsDeterminism) { B3_PROFILE("GPU Sort contact constraints (determinism)"); m_data->m_pBufContactOutGPUCopy->resize(numContacts); m_data->m_contactKeyValues->resize(numContacts); m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst(numContacts); launcher.launch1D( numContacts, 64 ); } m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); { B3_PROFILE("gpu reorderContactKernel (determinism)"); b3Int4 cdata; cdata.x = numContacts; //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); launcher.setConst( cdata ); launcher.launch1D( numContacts, 64 ); } } else { B3_PROFILE("CPU Sort contact constraints (determinism)"); b3AlignedObjectArray<b3Contact4> cpuConstraints; m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints); bool sort = true; if (sort) { cpuConstraints.quickSort(b3ContactCmp); for (int i=0;i<cpuConstraints.size();i++) { cpuConstraints[i].m_batchIdx = i; } } m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints); if (m_debugOutput==100) { for (int i=0;i<cpuConstraints.size();i++) { printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx); } } m_debugOutput++; } } int nContactOut = m_data->m_pBufContactOutGPU->size(); bool useSolver = true; if (useSolver) { float dt=1./60.; b3ConstraintCfg csCfg( dt ); csCfg.m_enableParallelSolve = true; csCfg.m_batchCellSize = 6; csCfg.m_staticIdx = static0Index; b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU; void* additionalData = 0;//m_data->m_frictionCGPU; const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU; b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU; int nContacts = nContactOut; int maxNumBatches = 0; if (!gUseLargeBatches) { if( m_data->m_solverGPU->m_contactBuffer2) { m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); } if( m_data->m_solverGPU->m_contactBuffer2 == 0 ) { m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts ); m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); } //clFinish(m_data->m_queue); { B3_PROFILE("batching"); //@todo: just reserve it, without copy of original contact (unless we use warmstarting) const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf; { //b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf ); //b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn ); const int sortAlignment = 512; // todo. get this out of sort if( csCfg.m_enableParallelSolve ) { int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment ); b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; if (!gCpuSetSortData) { // 2. set cell idx B3_PROFILE("GPU set cell idx"); struct CB { int m_nContacts; int m_staticIdx; float m_scale; b3Int4 m_nSplit; }; b3Assert( sortSize%64 == 0 ); CB cdata; cdata.m_nContacts = nContacts; cdata.m_staticIdx = csCfg.m_staticIdx; cdata.m_scale = 1.f/csCfg.m_batchCellSize; cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X; cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y; cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z; m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata.m_nContacts ); launcher.setConst( cdata.m_scale ); launcher.setConst(cdata.m_nSplit); launcher.setConst(cdata.m_staticIdx); launcher.launch1D( sortSize, 64 ); } else { m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); b3AlignedObjectArray<b3SortData> sortDataCPU; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU); b3AlignedObjectArray<b3Contact4> contactCPU; m_data->m_pBufContactOutGPU->copyToHost(contactCPU); b3AlignedObjectArray<b3RigidBodyData> bodiesCPU; bodyBuf->copyToHost(bodiesCPU); float scale = 1.f/csCfg.m_batchCellSize; b3Int4 nSplit; nSplit.x = B3_SOLVER_N_SPLIT_X; nSplit.y = B3_SOLVER_N_SPLIT_Y; nSplit.z = B3_SOLVER_N_SPLIT_Z; SetSortDataCPU(&contactCPU[0], &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx); m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU); } if (!gCpuRadixSort) { // 3. sort by cell idx B3_PROFILE("gpuRadixSort"); //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; //int sortBit = 32; //if( n <= 0xffff ) sortBit = 16; //if( n <= 0xff ) sortBit = 8; //adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); //adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut); } else { b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); b3AlignedObjectArray<b3SortData> hostValues; keyValuesInOut.copyToHost(hostValues); hostValues.quickSort(sortfnc); keyValuesInOut.copyFromHost(hostValues); } if (gUseScanHost) { // 4. find entries B3_PROFILE("cpuBoundSearch"); b3AlignedObjectArray<unsigned int> countsHost; countsNative->copyToHost(countsHost); b3AlignedObjectArray<b3SortData> sortDataHost; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); //m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); countsNative->copyFromHost(countsHost); //adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, // B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); //unsigned int sum; //m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); b3AlignedObjectArray<unsigned int> offsetsHost; offsetsHost.resize(offsetsNative->size()); m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum ); offsetsNative->copyFromHost(offsetsHost); //printf("sum = %d\n",sum); } else { // 4. find entries B3_PROFILE("gpuBoundSearch"); m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); } if (nContacts) { // 5. sort constraints by cellIdx if (gReorderContactsOnCpu) { B3_PROFILE("cpu m_reorderContactKernel"); b3AlignedObjectArray<b3SortData> sortDataHost; m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); b3AlignedObjectArray<b3Contact4> inContacts; b3AlignedObjectArray<b3Contact4> outContacts; m_data->m_pBufContactOutGPU->copyToHost(inContacts); outContacts.resize(inContacts.size()); for (int i=0;i<nContacts;i++) { int srcIdx = sortDataHost[i].y; outContacts[i] = inContacts[srcIdx]; } m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts); /* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" "{\n" " int nContacts = cb.x;\n" " int gIdx = GET_GLOBAL_IDX;\n" " if( gIdx < nContacts )\n" " {\n" " int srcIdx = sortData[gIdx].y;\n" " out[gIdx] = in[srcIdx];\n" " }\n" "}\n" */ } else { B3_PROFILE("gpu m_reorderContactKernel"); b3Int4 cdata; cdata.x = nContacts; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); } } } } //clFinish(m_data->m_queue); // { // b3AlignedObjectArray<unsigned int> histogram; // m_data->m_solverGPU->m_numConstraints->copyToHost(histogram); // printf(",,,\n"); // } if (nContacts) { if (gUseCpuCopyConstraints) { for (int i=0;i<nContacts;i++) { m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2); // m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); // m_data->m_pBufContactOutGPU->getBufferCL() } } else { B3_PROFILE("gpu m_copyConstraintKernel"); b3Int4 cdata; cdata.x = nContacts; b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) }; b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); //we use the clFinish for proper benchmark/profile clFinish(m_data->m_queue); } } bool compareGPU = false; if (nContacts) { if (!gCpuBatchContacts) { B3_PROFILE("gpu batchContacts"); maxNumBatches = 150;//250; m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); clFinish(m_data->m_queue); } else { B3_PROFILE("cpu batchContacts"); static b3AlignedObjectArray<b3Contact4> cpuContacts; b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; { B3_PROFILE("copyToHost"); contactsIn->copyToHost(cpuContacts); } b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; b3AlignedObjectArray<unsigned int> nNativeHost; b3AlignedObjectArray<unsigned int> offsetsNativeHost; { B3_PROFILE("countsNative/offsetsNative copyToHost"); countsNative->copyToHost(nNativeHost); offsetsNative->copyToHost(offsetsNativeHost); } int numNonzeroGrid=0; if (gUseLargeBatches) { m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); int totalNumConstraints = cpuContacts.size(); int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } } else { m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES); B3_PROFILE("cpu batch grid"); for(int i=0; i<B3_SOLVER_N_CELLS; i++) { int n = (nNativeHost)[i]; int offset = (offsetsNativeHost)[i]; if( n ) { numNonzeroGrid++; int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } //we use the clFinish for proper benchmark/profile } } //clFinish(m_data->m_queue); } { B3_PROFILE("m_contactBuffer->copyFromHost"); m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); } } } } } //printf("maxNumBatches = %d\n", maxNumBatches); if (gUseLargeBatches) { if (nContacts) { B3_PROFILE("cpu batchContacts"); static b3AlignedObjectArray<b3Contact4> cpuContacts; // b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; { B3_PROFILE("copyToHost"); m_data->m_pBufContactOutGPU->copyToHost(cpuContacts); } b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; int numNonzeroGrid=0; { m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); int totalNumConstraints = cpuContacts.size(); int simdWidth =numBodies+1;//-1;//64;//-1;//32; int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU maxNumBatches = b3Max(numBatches,maxNumBatches); static int globalMaxBatch = 0; if (maxNumBatches>globalMaxBatch ) { globalMaxBatch = maxNumBatches; b3Printf("maxNumBatches = %d\n",maxNumBatches); } } { B3_PROFILE("m_contactBuffer->copyFromHost"); m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); } } } if (nContacts) { B3_PROFILE("gpu convertToConstraints"); m_data->m_solverGPU->convertToConstraints( bodyBuf, shapeBuf, m_data->m_solverGPU->m_contactBuffer2, contactConstraintOut, additionalData, nContacts, (b3SolverBase::ConstraintCfg&) csCfg ); clFinish(m_data->m_queue); } if (1) { int numIter = 4; m_data->m_solverGPU->m_nIterations = numIter;//10 if (!gCpuSolveConstraint) { B3_PROFILE("GPU solveContactConstraint"); /*m_data->m_solverGPU->solveContactConstraint( m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches); */ //m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes); if (gUseLargeBatches) { solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches,numIter,&m_data->m_batchSizes); } else { solveContactConstraint( m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut , maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu); } } else { B3_PROFILE("Host solveContactConstraint"); m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes); } } #if 0 if (0) { B3_PROFILE("read body velocities back to CPU"); //read body updated linear/angular velocities back to CPU m_data->m_bodyBufferGPU->read( m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL ); } #endif } }
inline int b3GpuPgsContactSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) { B3_PROFILE("sortConstraintByBatch2"); bodyUsed2.resize(2*simdWidth); for (int q=0;q<2*simdWidth;q++) bodyUsed2[q]=0; int curBodyUsed = 0; int numIter = 0; m_data->m_sortData.resize(numConstraints); m_data->m_idxBuffer.resize(numConstraints); m_data->m_old.resize(numConstraints); unsigned int* idxSrc = &m_data->m_idxBuffer[0]; #if defined(_DEBUG) for(int i=0; i<numConstraints; i++) cs[i].getBatchIdx() = -1; #endif for(int i=0; i<numConstraints; i++) idxSrc[i] = i; int numValidConstraints = 0; int unprocessedConstraintIndex = 0; int batchIdx = 0; { B3_PROFILE("cpu batch innerloop"); while( numValidConstraints < numConstraints) { numIter++; int nCurrentBatch = 0; // clear flag for(int i=0; i<curBodyUsed; i++) bodyUsed2[i] = 0; curBodyUsed = 0; for(int i=numValidConstraints; i<numConstraints; i++) { int idx = idxSrc[i]; b3Assert( idx < numConstraints ); // check if it can go int bodyAS = cs[idx].m_bodyAPtrAndSignBit; int bodyBS = cs[idx].m_bodyBPtrAndSignBit; int bodyA = abs(bodyAS); int bodyB = abs(bodyBS); bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; int aUnavailable = 0; int bUnavailable = 0; if (!aIsStatic) { for (int j=0;j<curBodyUsed;j++) { if (bodyA == bodyUsed2[j]) { aUnavailable=1; break; } } } if (!aUnavailable) if (!bIsStatic) { for (int j=0;j<curBodyUsed;j++) { if (bodyB == bodyUsed2[j]) { bUnavailable=1; break; } } } if( aUnavailable==0 && bUnavailable==0 ) // ok { if (!aIsStatic) { bodyUsed2[curBodyUsed++] = bodyA; } if (!bIsStatic) { bodyUsed2[curBodyUsed++] = bodyB; } cs[idx].getBatchIdx() = batchIdx; m_data->m_sortData[idx].m_key = batchIdx; m_data->m_sortData[idx].m_value = idx; if (i!=numValidConstraints) { b3Swap(idxSrc[i], idxSrc[numValidConstraints]); } numValidConstraints++; { nCurrentBatch++; if( nCurrentBatch == simdWidth ) { nCurrentBatch = 0; for(int i=0; i<curBodyUsed; i++) bodyUsed2[i] = 0; curBodyUsed = 0; } } } } batchIdx ++; } } { B3_PROFILE("quickSort"); //m_data->m_sortData.quickSort(sortfnc); } { B3_PROFILE("reorder"); // reorder memcpy( &m_data->m_old[0], cs, sizeof(b3Contact4)*numConstraints); for(int i=0; i<numConstraints; i++) { b3Assert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]); int idx = m_data->m_sortData[idxSrc[i]].m_value; cs[i] = m_data->m_old[idx]; } } #if defined(_DEBUG) // debugPrintf( "nBatches: %d\n", batchIdx ); for(int i=0; i<numConstraints; i++) { b3Assert( cs[i].getBatchIdx() != -1 ); } #endif return batchIdx; }
inline int b3GpuPgsContactSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies) { B3_PROFILE("sortConstraintByBatch"); int numIter = 0; sortData.resize(n); idxBuffer.resize(n); old.resize(n); unsigned int* idxSrc = &idxBuffer[0]; unsigned int* idxDst = &idxBuffer[0]; int nIdxSrc, nIdxDst; const int N_FLG = 256; const int FLG_MASK = N_FLG-1; unsigned int flg[N_FLG/32]; #if defined(_DEBUG) for(int i=0; i<n; i++) cs[i].getBatchIdx() = -1; #endif for(int i=0; i<n; i++) idxSrc[i] = i; nIdxSrc = n; int batchIdx = 0; { B3_PROFILE("cpu batch innerloop"); while( nIdxSrc ) { numIter++; nIdxDst = 0; int nCurrentBatch = 0; // clear flag for(int i=0; i<N_FLG/32; i++) flg[i] = 0; for(int i=0; i<nIdxSrc; i++) { int idx = idxSrc[i]; b3Assert( idx < n ); // check if it can go int bodyAS = cs[idx].m_bodyAPtrAndSignBit; int bodyBS = cs[idx].m_bodyBPtrAndSignBit; int bodyA = abs(bodyAS); int bodyB = abs(bodyBS); int aIdx = bodyA & FLG_MASK; int bIdx = bodyB & FLG_MASK; unsigned int aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31)); unsigned int bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31)); bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; //use inv_mass! aUnavailable = !aIsStatic? aUnavailable:0;// bUnavailable = !bIsStatic? bUnavailable:0; if( aUnavailable==0 && bUnavailable==0 ) // ok { if (!aIsStatic) flg[ aIdx/32 ] |= (1<<(aIdx&31)); if (!bIsStatic) flg[ bIdx/32 ] |= (1<<(bIdx&31)); cs[idx].getBatchIdx() = batchIdx; sortData[idx].m_key = batchIdx; sortData[idx].m_value = idx; { nCurrentBatch++; if( nCurrentBatch == simdWidth ) { nCurrentBatch = 0; for(int i=0; i<N_FLG/32; i++) flg[i] = 0; } } } else { idxDst[nIdxDst++] = idx; } } b3Swap( idxSrc, idxDst ); b3Swap( nIdxSrc, nIdxDst ); batchIdx ++; } } { B3_PROFILE("quickSort"); sortData.quickSort(sortfnc); } { B3_PROFILE("reorder"); // reorder memcpy( &old[0], cs, sizeof(b3Contact4)*n); for(int i=0; i<n; i++) { int idx = sortData[i].m_value; cs[i] = old[idx]; } } #if defined(_DEBUG) // debugPrintf( "nBatches: %d\n", batchIdx ); for(int i=0; i<n; i++) { b3Assert( cs[i].getBatchIdx() != -1 ); } #endif return batchIdx; }