void btConvexPolyhedron::project(const btTransform& trans, const btVector3& dir, btScalar& minProj, btScalar& maxProj, btVector3& witnesPtMin,btVector3& witnesPtMax) const { minProj = FLT_MAX; maxProj = -FLT_MAX; int numVerts = m_vertices.size(); for(int i=0;i<numVerts;i++) { btVector3 pt = trans * m_vertices[i]; btScalar dp = pt.dot(dir); if(dp < minProj) { minProj = dp; witnesPtMin = pt; } if(dp > maxProj) { maxProj = dp; witnesPtMax = pt; } } if(minProj>maxProj) { btSwap(minProj,maxProj); btSwap(witnesPtMin,witnesPtMax); } }
btInternalVertexPair(short int v0,short int v1) :m_v0(v0), m_v1(v1) { if (m_v1>m_v0) btSwap(m_v0,m_v1); }
btBroadphasePair* btHashedOverlappingPairCache::findPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1) { gFindPairs++; if(proxy0->m_uniqueId>proxy1->m_uniqueId) btSwap(proxy0,proxy1); int proxyId1 = proxy0->getUid(); int proxyId2 = proxy1->getUid(); /*if (proxyId1 > proxyId2) btSwap(proxyId1, proxyId2);*/ int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1), static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1)); if (hash >= m_hashTable.size()) { return NULL; } int index = m_hashTable[hash]; while (index != BT_NULL_PAIR && equalsPair(m_overlappingPairArray[index], proxyId1, proxyId2) == false) { index = m_next[index]; } if (index == BT_NULL_PAIR) { return NULL; } btAssert(index < m_overlappingPairArray.size()); return &m_overlappingPairArray[index]; }
static DBVT_INLINE btDbvtNode* sort(btDbvtNode* n,btDbvtNode*& r) { btDbvtNode* p=n->parent; btAssert(n->isinternal()); if(p>n) { const int i=indexof(n); const int j=1-i; btDbvtNode* s=p->childs[j]; btDbvtNode* q=p->parent; btAssert(n==p->childs[i]); if(q) q->childs[indexof(p)]=n; else r=n; s->parent=n; p->parent=n; n->parent=q; p->childs[0]=n->childs[0]; p->childs[1]=n->childs[1]; n->childs[0]->parent=p; n->childs[1]->parent=p; n->childs[i]=p; n->childs[j]=s; btSwap(p->volume,n->volume); return(p); } return(n); }
int4 HullLibrary::FindSimplex(btVector3 *verts,int verts_count,btAlignedObjectArray<int> &allow) { btVector3 basis[3]; basis[0] = btVector3( btScalar(0.01), btScalar(0.02), btScalar(1.0) ); int p0 = maxdirsterid(verts,verts_count, basis[0],allow); int p1 = maxdirsterid(verts,verts_count,-basis[0],allow); basis[0] = verts[p0]-verts[p1]; if(p0==p1 || basis[0]==btVector3(0,0,0)) return int4(-1,-1,-1,-1); basis[1] = btCross(btVector3( btScalar(1),btScalar(0.02), btScalar(0)),basis[0]); basis[2] = btCross(btVector3(btScalar(-0.02), btScalar(1), btScalar(0)),basis[0]); if (basis[1].length() > basis[2].length()) { basis[1].normalize(); } else { basis[1] = basis[2]; basis[1].normalize (); } int p2 = maxdirsterid(verts,verts_count,basis[1],allow); if(p2 == p0 || p2 == p1) { p2 = maxdirsterid(verts,verts_count,-basis[1],allow); } if(p2 == p0 || p2 == p1) return int4(-1,-1,-1,-1); basis[1] = verts[p2] - verts[p0]; basis[2] = btCross(basis[1],basis[0]).normalized(); int p3 = maxdirsterid(verts,verts_count,basis[2],allow); if(p3==p0||p3==p1||p3==p2) p3 = maxdirsterid(verts,verts_count,-basis[2],allow); if(p3==p0||p3==p1||p3==p2) return int4(-1,-1,-1,-1); btAssert(!(p0==p1||p0==p2||p0==p3||p1==p2||p1==p3||p2==p3)); if(btDot(verts[p3]-verts[p0],btCross(verts[p1]-verts[p0],verts[p2]-verts[p0])) <0) {btSwap(p2,p3);} return int4(p0,p1,p2,p3); }
btBroadphasePair* btHashedOverlappingPairCache::internalAddPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1) { if(proxy0->m_uniqueId>proxy1->m_uniqueId) btSwap(proxy0,proxy1); int proxyId1 = proxy0->getUid(); int proxyId2 = proxy1->getUid(); /*if (proxyId1 > proxyId2) btSwap(proxyId1, proxyId2);*/ int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1)); // New hash value with new mask btBroadphasePair* pair = internalFindPair(proxy0, proxy1, hash); if (pair != NULL) { return pair; } /*for(int i=0;i<m_overlappingPairArray.size();++i) { if( (m_overlappingPairArray[i].m_pProxy0==proxy0)&& (m_overlappingPairArray[i].m_pProxy1==proxy1)) { printf("Adding duplicated %u<>%u\r\n",proxyId1,proxyId2); internalFindPair(proxy0, proxy1, hash); } }*/ int count = m_overlappingPairArray.size(); int oldCapacity = m_overlappingPairArray.capacity(); void* mem = &m_overlappingPairArray.expandNonInitializing(); //this is where we add an actual pair, so also call the 'ghost' if (m_ghostPairCallback) m_ghostPairCallback->addOverlappingPair(proxy0,proxy1); int newCapacity = m_overlappingPairArray.capacity(); if (oldCapacity < newCapacity) { growTables(); //hash with new capacity hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1)); } pair = new (mem) btBroadphasePair(*proxy0,*proxy1); // pair->m_pProxy0 = proxy0; // pair->m_pProxy1 = proxy1; pair->m_algorithm = 0; pair->m_internalTmpValue = 0; m_next[count] = m_hashTable[hash]; m_hashTable[hash] = count; return pair; }
void btConvexHullShape::project(const btTransform& trans, const btVector3& dir, btScalar& minProj, btScalar& maxProj, btVector3& witnesPtMin,btVector3& witnesPtMax) const { #if 1 minProj = FLT_MAX; maxProj = -FLT_MAX; int numVerts = m_unscaledPoints.size(); for(int i=0;i<numVerts;i++) { btVector3 vtx = m_unscaledPoints[i] * m_localScaling; btVector3 pt = trans * vtx; btScalar dp = pt.dot(dir); if(dp < minProj) { minProj = dp; witnesPtMin = pt; } if(dp > maxProj) { maxProj = dp; witnesPtMax=pt; } } #else btVector3 localAxis = dir*trans.getBasis(); witnesPtMin = trans(localGetSupportingVertex(localAxis)); witnesPtMax = trans(localGetSupportingVertex(-localAxis)); minProj = witnesPtMin.dot(dir); maxProj = witnesPtMax.dot(dir); #endif if(minProj>maxProj) { btSwap(minProj,maxProj); btSwap(witnesPtMin,witnesPtMax); } }
void Process(const btDbvtNode* na, const btDbvtNode* nb) { if (na != nb) { btDbvtProxy* pa = (btDbvtProxy*)na->data; btDbvtProxy* pb = (btDbvtProxy*)nb->data; #if DBVT_BP_SORTPAIRS if (pa->m_uniqueId > pb->m_uniqueId) btSwap(pa, pb); #endif pbp->m_paircache->addOverlappingPair(pa, pb); ++pbp->m_newpairs; } }
void btPolyhedralContactClipping::clipFaceAgainstHull(const btVector3& separatingNormal, const btConvexPolyhedron& hullA, const btTransform& transA, btVertexArray& worldVertsB1, const btScalar minDist, btScalar maxDist,btDiscreteCollisionDetectorInterface::Result& resultOut) { btVertexArray worldVertsB2; btVertexArray* pVtxIn = &worldVertsB1; btVertexArray* pVtxOut = &worldVertsB2; pVtxOut->reserve(pVtxIn->size()); int closestFaceA=-1; { btScalar dmin = FLT_MAX; for(int face=0;face<hullA.m_faces.size();face++) { const btVector3 Normal(hullA.m_faces[face].m_plane[0], hullA.m_faces[face].m_plane[1], hullA.m_faces[face].m_plane[2]); const btVector3 faceANormalWS = transA.getBasis() * Normal; btScalar d = faceANormalWS.dot(separatingNormal); if (d < dmin) { dmin = d; closestFaceA = face; } } } if (closestFaceA<0) return; const btFace& polyA = hullA.m_faces[closestFaceA]; // clip polygon to back of planes of all faces of hull A that are adjacent to witness face int numContacts = pVtxIn->size(); int numVerticesA = polyA.m_indices.size(); for(int e0=0;e0<numVerticesA;e0++) { const btVector3& a = hullA.m_vertices[polyA.m_indices[e0]]; const btVector3& b = hullA.m_vertices[polyA.m_indices[(e0+1)%numVerticesA]]; const btVector3 edge0 = a - b; const btVector3 WorldEdge0 = transA.getBasis() * edge0; btVector3 worldPlaneAnormal1 = transA.getBasis()* btVector3(polyA.m_plane[0],polyA.m_plane[1],polyA.m_plane[2]); btVector3 planeNormalWS1 = -WorldEdge0.cross(worldPlaneAnormal1);//.cross(WorldEdge0); btVector3 worldA1 = transA*a; btScalar planeEqWS1 = -worldA1.dot(planeNormalWS1); //int otherFace=0; #ifdef BLA1 int otherFace = polyA.m_connectedFaces[e0]; btVector3 localPlaneNormal (hullA.m_faces[otherFace].m_plane[0],hullA.m_faces[otherFace].m_plane[1],hullA.m_faces[otherFace].m_plane[2]); btScalar localPlaneEq = hullA.m_faces[otherFace].m_plane[3]; btVector3 planeNormalWS = transA.getBasis()*localPlaneNormal; btScalar planeEqWS=localPlaneEq-planeNormalWS.dot(transA.getOrigin()); #else btVector3 planeNormalWS = planeNormalWS1; btScalar planeEqWS=planeEqWS1; #endif //clip face clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); btSwap(pVtxIn,pVtxOut); pVtxOut->resize(0); } //#define ONLY_REPORT_DEEPEST_POINT btVector3 point; // only keep points that are behind the witness face { btVector3 localPlaneNormal (polyA.m_plane[0],polyA.m_plane[1],polyA.m_plane[2]); btScalar localPlaneEq = polyA.m_plane[3]; btVector3 planeNormalWS = transA.getBasis()*localPlaneNormal; btScalar planeEqWS=localPlaneEq-planeNormalWS.dot(transA.getOrigin()); for (int i=0;i<pVtxIn->size();i++) { btScalar depth = planeNormalWS.dot(pVtxIn->at(i))+planeEqWS; if (depth <=minDist) { // printf("clamped: depth=%f to minDist=%f\n",depth,minDist); depth = minDist; } if (depth <=maxDist) { btVector3 point = pVtxIn->at(i); #ifdef ONLY_REPORT_DEEPEST_POINT curMaxDist = depth; #else #if 0 if (depth<-3) { printf("error in btPolyhedralContactClipping depth = %f\n", depth); printf("likely wrong separatingNormal passed in\n"); } #endif resultOut.addContactPoint(separatingNormal,point,depth); #endif } } } #ifdef ONLY_REPORT_DEEPEST_POINT if (curMaxDist<maxDist) { resultOut.addContactPoint(separatingNormal,point,curMaxDist); } #endif //ONLY_REPORT_DEEPEST_POINT }
void btDbvtBroadphase::collide(btDispatcher* dispatcher) { /*printf("---------------------------------------------------------\n"); printf("m_sets[0].m_leaves=%d\n",m_sets[0].m_leaves); printf("m_sets[1].m_leaves=%d\n",m_sets[1].m_leaves); printf("numPairs = %d\n",getOverlappingPairCache()->getNumOverlappingPairs()); { int i; for (i=0;i<getOverlappingPairCache()->getNumOverlappingPairs();i++) { printf("pair[%d]=(%d,%d),",i,getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy0->getUid(), getOverlappingPairCache()->getOverlappingPairArray()[i].m_pProxy1->getUid()); } printf("\n"); } */ SPC(m_profiling.m_total); /* optimize */ m_sets[0].optimizeIncremental(1 + (m_sets[0].m_leaves * m_dupdates) / 100); if (m_fixedleft) { const int count = 1 + (m_sets[1].m_leaves * m_fupdates) / 100; m_sets[1].optimizeIncremental(1 + (m_sets[1].m_leaves * m_fupdates) / 100); m_fixedleft = btMax<int>(0, m_fixedleft - count); } /* dynamic -> fixed set */ m_stageCurrent = (m_stageCurrent + 1) % STAGECOUNT; btDbvtProxy* current = m_stageRoots[m_stageCurrent]; if (current) { #if DBVT_BP_ACCURATESLEEPING btDbvtTreeCollider collider(this); #endif do { btDbvtProxy* next = current->links[1]; listremove(current, m_stageRoots[current->stage]); listappend(current, m_stageRoots[STAGECOUNT]); #if DBVT_BP_ACCURATESLEEPING m_paircache->removeOverlappingPairsContainingProxy(current, dispatcher); collider.proxy = current; btDbvt::collideTV(m_sets[0].m_root, current->aabb, collider); btDbvt::collideTV(m_sets[1].m_root, current->aabb, collider); #endif m_sets[0].remove(current->leaf); ATTRIBUTE_ALIGNED16(btDbvtVolume) curAabb = btDbvtVolume::FromMM(current->m_aabbMin, current->m_aabbMax); current->leaf = m_sets[1].insert(curAabb, current); current->stage = STAGECOUNT; current = next; } while (current); m_fixedleft = m_sets[1].m_leaves; m_needcleanup = true; } /* collide dynamics */ { btDbvtTreeCollider collider(this); if (m_deferedcollide) { SPC(m_profiling.m_fdcollide); m_sets[0].collideTTpersistentStack(m_sets[0].m_root, m_sets[1].m_root, collider); } if (m_deferedcollide) { SPC(m_profiling.m_ddcollide); m_sets[0].collideTTpersistentStack(m_sets[0].m_root, m_sets[0].m_root, collider); } } /* clean up */ if (m_needcleanup) { SPC(m_profiling.m_cleanup); btBroadphasePairArray& pairs = m_paircache->getOverlappingPairArray(); if (pairs.size() > 0) { int ni = btMin(pairs.size(), btMax<int>(m_newpairs, (pairs.size() * m_cupdates) / 100)); for (int i = 0; i < ni; ++i) { btBroadphasePair& p = pairs[(m_cid + i) % pairs.size()]; btDbvtProxy* pa = (btDbvtProxy*)p.m_pProxy0; btDbvtProxy* pb = (btDbvtProxy*)p.m_pProxy1; if (!Intersect(pa->leaf->volume, pb->leaf->volume)) { #if DBVT_BP_SORTPAIRS if (pa->m_uniqueId > pb->m_uniqueId) btSwap(pa, pb); #endif m_paircache->removeOverlappingPair(pa, pb, dispatcher); --ni; --i; } } if (pairs.size() > 0) m_cid = (m_cid + ni) % pairs.size(); else m_cid = 0; } } ++m_pid; m_newpairs = 1; m_needcleanup = false; if (m_updates_call > 0) { m_updates_ratio = m_updates_done / (btScalar)m_updates_call; } else { m_updates_ratio = 0; } m_updates_done /= 2; m_updates_call /= 2; }
void TinyRendererVisualShapeConverter::render(const float viewMat[16], const float projMat[16]) { //clear the color buffer TGAColor clearColor; clearColor.bgra[0] = 255; clearColor.bgra[1] = 255; clearColor.bgra[2] = 255; clearColor.bgra[3] = 255; clearBuffers(clearColor); ATTRIBUTE_ALIGNED16(btScalar modelMat[16]); btVector3 lightDirWorld(-5,200,-40); switch (m_data->m_upAxis) { case 1: lightDirWorld = btVector3(-50.f,100,30); break; case 2: lightDirWorld = btVector3(-50.f,30,100); break; default:{} }; lightDirWorld.normalize(); // printf("num m_swRenderInstances = %d\n", m_data->m_swRenderInstances.size()); for (int i=0;i<m_data->m_swRenderInstances.size();i++) { TinyRendererObjectArray** visualArrayPtr = m_data->m_swRenderInstances.getAtIndex(i); if (0==visualArrayPtr) continue;//can this ever happen? TinyRendererObjectArray* visualArray = *visualArrayPtr; btHashPtr colObjHash = m_data->m_swRenderInstances.getKeyAtIndex(i); const btCollisionObject* colObj = (btCollisionObject*) colObjHash.getPointer(); for (int v=0;v<visualArray->m_renderObjects.size();v++) { TinyRenderObjectData* renderObj = visualArray->m_renderObjects[v]; //sync the object transform const btTransform& tr = colObj->getWorldTransform(); tr.getOpenGLMatrix(modelMat); for (int i=0;i<4;i++) { for (int j=0;j<4;j++) { renderObj->m_projectionMatrix[i][j] = projMat[i+4*j]; renderObj->m_modelMatrix[i][j] = modelMat[i+4*j]; renderObj->m_viewMatrix[i][j] = viewMat[i+4*j]; renderObj->m_localScaling = colObj->getCollisionShape()->getLocalScaling(); renderObj->m_lightDirWorld = lightDirWorld; } } TinyRenderer::renderObject(*renderObj); } } //printf("write tga \n"); //m_data->m_rgbColorBuffer.write_tga_file("camera.tga"); // printf("flipped!\n"); m_data->m_rgbColorBuffer.flip_vertically(); //flip z-buffer { int half = m_data->m_swHeight>>1; for (int j=0; j<half; j++) { unsigned long l1 = j*m_data->m_swWidth; unsigned long l2 = (m_data->m_swHeight-1-j)*m_data->m_swWidth; for (int i=0;i<m_data->m_swWidth;i++) { btSwap(m_data->m_depthBuffer[l1+i],m_data->m_depthBuffer[l2+i]); } } } }
void* btHashedOverlappingPairCache::removeOverlappingPair(btBroadphaseProxy* proxy0, btBroadphaseProxy* proxy1,btDispatcher* dispatcher) { gRemovePairs++; if(proxy0->m_uniqueId>proxy1->m_uniqueId) btSwap(proxy0,proxy1); int proxyId1 = proxy0->getUid(); int proxyId2 = proxy1->getUid(); /*if (proxyId1 > proxyId2) btSwap(proxyId1, proxyId2);*/ int hash = static_cast<int>(getHash(static_cast<unsigned int>(proxyId1),static_cast<unsigned int>(proxyId2)) & (m_overlappingPairArray.capacity()-1)); btBroadphasePair* pair = internalFindPair(proxy0, proxy1, hash); if (pair == NULL) { return 0; } cleanOverlappingPair(*pair,dispatcher); void* userData = pair->m_internalInfo1; btAssert(pair->m_pProxy0->getUid() == proxyId1); btAssert(pair->m_pProxy1->getUid() == proxyId2); int pairIndex = int(pair - &m_overlappingPairArray[0]); btAssert(pairIndex < m_overlappingPairArray.size()); // Remove the pair from the hash table. int index = m_hashTable[hash]; btAssert(index != BT_NULL_PAIR); int previous = BT_NULL_PAIR; while (index != pairIndex) { previous = index; index = m_next[index]; } if (previous != BT_NULL_PAIR) { btAssert(m_next[previous] == pairIndex); m_next[previous] = m_next[pairIndex]; } else { m_hashTable[hash] = m_next[pairIndex]; } // We now move the last pair into spot of the // pair being removed. We need to fix the hash // table indices to support the move. int lastPairIndex = m_overlappingPairArray.size() - 1; if (m_ghostPairCallback) m_ghostPairCallback->removeOverlappingPair(proxy0, proxy1,dispatcher); // If the removed pair is the last pair, we are done. if (lastPairIndex == pairIndex) { m_overlappingPairArray.pop_back(); return userData; } // Remove the last pair from the hash table. const btBroadphasePair* last = &m_overlappingPairArray[lastPairIndex]; /* missing swap here too, Nat. */ int lastHash = static_cast<int>(getHash(static_cast<unsigned int>(last->m_pProxy0->getUid()), static_cast<unsigned int>(last->m_pProxy1->getUid())) & (m_overlappingPairArray.capacity()-1)); index = m_hashTable[lastHash]; btAssert(index != BT_NULL_PAIR); previous = BT_NULL_PAIR; while (index != lastPairIndex) { previous = index; index = m_next[index]; } if (previous != BT_NULL_PAIR) { btAssert(m_next[previous] == lastPairIndex); m_next[previous] = m_next[lastPairIndex]; } else { m_hashTable[lastHash] = m_next[lastPairIndex]; } // Copy the last pair into the remove pair's spot. m_overlappingPairArray[pairIndex] = m_overlappingPairArray[lastPairIndex]; // Insert the last pair into the hash table m_next[pairIndex] = m_hashTable[lastHash]; m_hashTable[lastHash] = pairIndex; m_overlappingPairArray.pop_back(); return userData; }
void btDbvt::benchmark() { static const btScalar cfgVolumeCenterScale = 100; static const btScalar cfgVolumeExentsBase = 1; static const btScalar cfgVolumeExentsScale = 4; static const int cfgLeaves = 8192; static const bool cfgEnable = true; //[1] btDbvtVolume intersections bool cfgBenchmark1_Enable = cfgEnable; static const int cfgBenchmark1_Iterations = 8; static const int cfgBenchmark1_Reference = 3499; //[2] btDbvtVolume merges bool cfgBenchmark2_Enable = cfgEnable; static const int cfgBenchmark2_Iterations = 4; static const int cfgBenchmark2_Reference = 1945; //[3] btDbvt::collideTT bool cfgBenchmark3_Enable = cfgEnable; static const int cfgBenchmark3_Iterations = 512; static const int cfgBenchmark3_Reference = 5485; //[4] btDbvt::collideTT self bool cfgBenchmark4_Enable = cfgEnable; static const int cfgBenchmark4_Iterations = 512; static const int cfgBenchmark4_Reference = 2814; //[5] btDbvt::collideTT xform bool cfgBenchmark5_Enable = cfgEnable; static const int cfgBenchmark5_Iterations = 512; static const btScalar cfgBenchmark5_OffsetScale = 2; static const int cfgBenchmark5_Reference = 7379; //[6] btDbvt::collideTT xform,self bool cfgBenchmark6_Enable = cfgEnable; static const int cfgBenchmark6_Iterations = 512; static const btScalar cfgBenchmark6_OffsetScale = 2; static const int cfgBenchmark6_Reference = 7270; //[7] btDbvt::rayTest bool cfgBenchmark7_Enable = cfgEnable; static const int cfgBenchmark7_Passes = 32; static const int cfgBenchmark7_Iterations = 65536; static const int cfgBenchmark7_Reference = 6307; //[8] insert/remove bool cfgBenchmark8_Enable = cfgEnable; static const int cfgBenchmark8_Passes = 32; static const int cfgBenchmark8_Iterations = 65536; static const int cfgBenchmark8_Reference = 2105; //[9] updates (teleport) bool cfgBenchmark9_Enable = cfgEnable; static const int cfgBenchmark9_Passes = 32; static const int cfgBenchmark9_Iterations = 65536; static const int cfgBenchmark9_Reference = 1879; //[10] updates (jitter) bool cfgBenchmark10_Enable = cfgEnable; static const btScalar cfgBenchmark10_Scale = cfgVolumeCenterScale/10000; static const int cfgBenchmark10_Passes = 32; static const int cfgBenchmark10_Iterations = 65536; static const int cfgBenchmark10_Reference = 1244; //[11] optimize (incremental) bool cfgBenchmark11_Enable = cfgEnable; static const int cfgBenchmark11_Passes = 64; static const int cfgBenchmark11_Iterations = 65536; static const int cfgBenchmark11_Reference = 2510; //[12] btDbvtVolume notequal bool cfgBenchmark12_Enable = cfgEnable; static const int cfgBenchmark12_Iterations = 32; static const int cfgBenchmark12_Reference = 3677; //[13] culling(OCL+fullsort) bool cfgBenchmark13_Enable = cfgEnable; static const int cfgBenchmark13_Iterations = 1024; static const int cfgBenchmark13_Reference = 2231; //[14] culling(OCL+qsort) bool cfgBenchmark14_Enable = cfgEnable; static const int cfgBenchmark14_Iterations = 8192; static const int cfgBenchmark14_Reference = 3500; //[15] culling(KDOP+qsort) bool cfgBenchmark15_Enable = cfgEnable; static const int cfgBenchmark15_Iterations = 8192; static const int cfgBenchmark15_Reference = 1151; //[16] insert/remove batch bool cfgBenchmark16_Enable = cfgEnable; static const int cfgBenchmark16_BatchCount = 256; static const int cfgBenchmark16_Passes = 16384; static const int cfgBenchmark16_Reference = 5138; //[17] select bool cfgBenchmark17_Enable = cfgEnable; static const int cfgBenchmark17_Iterations = 4; static const int cfgBenchmark17_Reference = 3390; btClock wallclock; printf("Benchmarking dbvt...\r\n"); printf("\tWorld scale: %f\r\n",cfgVolumeCenterScale); printf("\tExtents base: %f\r\n",cfgVolumeExentsBase); printf("\tExtents range: %f\r\n",cfgVolumeExentsScale); printf("\tLeaves: %u\r\n",cfgLeaves); printf("\tsizeof(btDbvtVolume): %u bytes\r\n",sizeof(btDbvtVolume)); printf("\tsizeof(btDbvtNode): %u bytes\r\n",sizeof(btDbvtNode)); if(cfgBenchmark1_Enable) {// Benchmark 1 srand(380843); btAlignedObjectArray<btDbvtVolume> volumes; btAlignedObjectArray<bool> results; volumes.resize(cfgLeaves); results.resize(cfgLeaves); for(int i=0;i<cfgLeaves;++i) { volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale); } printf("[1] btDbvtVolume intersections: "); wallclock.reset(); for(int i=0;i<cfgBenchmark1_Iterations;++i) { for(int j=0;j<cfgLeaves;++j) { for(int k=0;k<cfgLeaves;++k) { results[k]=Intersect(volumes[j],volumes[k]); } } } const int time=(int)wallclock.getTimeMilliseconds(); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark1_Reference)*100/time); } if(cfgBenchmark2_Enable) {// Benchmark 2 srand(380843); btAlignedObjectArray<btDbvtVolume> volumes; btAlignedObjectArray<btDbvtVolume> results; volumes.resize(cfgLeaves); results.resize(cfgLeaves); for(int i=0;i<cfgLeaves;++i) { volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale); } printf("[2] btDbvtVolume merges: "); wallclock.reset(); for(int i=0;i<cfgBenchmark2_Iterations;++i) { for(int j=0;j<cfgLeaves;++j) { for(int k=0;k<cfgLeaves;++k) { Merge(volumes[j],volumes[k],results[k]); } } } const int time=(int)wallclock.getTimeMilliseconds(); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark2_Reference)*100/time); } if(cfgBenchmark3_Enable) {// Benchmark 3 srand(380843); btDbvt dbvt[2]; btDbvtBenchmark::NilPolicy policy; btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt[0]); btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt[1]); dbvt[0].optimizeTopDown(); dbvt[1].optimizeTopDown(); printf("[3] btDbvt::collideTT: "); wallclock.reset(); for(int i=0;i<cfgBenchmark3_Iterations;++i) { btDbvt::collideTT(dbvt[0].m_root,dbvt[1].m_root,policy); } const int time=(int)wallclock.getTimeMilliseconds(); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark3_Reference)*100/time); } if(cfgBenchmark4_Enable) {// Benchmark 4 srand(380843); btDbvt dbvt; btDbvtBenchmark::NilPolicy policy; btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); printf("[4] btDbvt::collideTT self: "); wallclock.reset(); for(int i=0;i<cfgBenchmark4_Iterations;++i) { btDbvt::collideTT(dbvt.m_root,dbvt.m_root,policy); } const int time=(int)wallclock.getTimeMilliseconds(); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark4_Reference)*100/time); } if(cfgBenchmark5_Enable) {// Benchmark 5 srand(380843); btDbvt dbvt[2]; btAlignedObjectArray<btTransform> transforms; btDbvtBenchmark::NilPolicy policy; transforms.resize(cfgBenchmark5_Iterations); for(int i=0;i<transforms.size();++i) { transforms[i]=btDbvtBenchmark::RandTransform(cfgVolumeCenterScale*cfgBenchmark5_OffsetScale); } btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt[0]); btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt[1]); dbvt[0].optimizeTopDown(); dbvt[1].optimizeTopDown(); printf("[5] btDbvt::collideTT xform: "); wallclock.reset(); for(int i=0;i<cfgBenchmark5_Iterations;++i) { btDbvt::collideTT(dbvt[0].m_root,dbvt[1].m_root,transforms[i],policy); } const int time=(int)wallclock.getTimeMilliseconds(); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark5_Reference)*100/time); } if(cfgBenchmark6_Enable) {// Benchmark 6 srand(380843); btDbvt dbvt; btAlignedObjectArray<btTransform> transforms; btDbvtBenchmark::NilPolicy policy; transforms.resize(cfgBenchmark6_Iterations); for(int i=0;i<transforms.size();++i) { transforms[i]=btDbvtBenchmark::RandTransform(cfgVolumeCenterScale*cfgBenchmark6_OffsetScale); } btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); printf("[6] btDbvt::collideTT xform,self: "); wallclock.reset(); for(int i=0;i<cfgBenchmark6_Iterations;++i) { btDbvt::collideTT(dbvt.m_root,dbvt.m_root,transforms[i],policy); } const int time=(int)wallclock.getTimeMilliseconds(); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark6_Reference)*100/time); } if(cfgBenchmark7_Enable) {// Benchmark 7 srand(380843); btDbvt dbvt; btAlignedObjectArray<btVector3> rayorg; btAlignedObjectArray<btVector3> raydir; btDbvtBenchmark::NilPolicy policy; rayorg.resize(cfgBenchmark7_Iterations); raydir.resize(cfgBenchmark7_Iterations); for(int i=0;i<rayorg.size();++i) { rayorg[i]=btDbvtBenchmark::RandVector3(cfgVolumeCenterScale*2); raydir[i]=btDbvtBenchmark::RandVector3(cfgVolumeCenterScale*2); } btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); printf("[7] btDbvt::rayTest: "); wallclock.reset(); for(int i=0;i<cfgBenchmark7_Passes;++i) { for(int j=0;j<cfgBenchmark7_Iterations;++j) { btDbvt::rayTest(dbvt.m_root,rayorg[j],rayorg[j]+raydir[j],policy); } } const int time=(int)wallclock.getTimeMilliseconds(); unsigned rays=cfgBenchmark7_Passes*cfgBenchmark7_Iterations; printf("%u ms (%i%%),(%u r/s)\r\n",time,(time-cfgBenchmark7_Reference)*100/time,(rays*1000)/time); } if(cfgBenchmark8_Enable) {// Benchmark 8 srand(380843); btDbvt dbvt; btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); printf("[8] insert/remove: "); wallclock.reset(); for(int i=0;i<cfgBenchmark8_Passes;++i) { for(int j=0;j<cfgBenchmark8_Iterations;++j) { dbvt.remove(dbvt.insert(btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale),0)); } } const int time=(int)wallclock.getTimeMilliseconds(); const int ir=cfgBenchmark8_Passes*cfgBenchmark8_Iterations; printf("%u ms (%i%%),(%u ir/s)\r\n",time,(time-cfgBenchmark8_Reference)*100/time,ir*1000/time); } if(cfgBenchmark9_Enable) {// Benchmark 9 srand(380843); btDbvt dbvt; btAlignedObjectArray<const btDbvtNode*> leaves; btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); dbvt.extractLeaves(dbvt.m_root,leaves); printf("[9] updates (teleport): "); wallclock.reset(); for(int i=0;i<cfgBenchmark9_Passes;++i) { for(int j=0;j<cfgBenchmark9_Iterations;++j) { dbvt.update(const_cast<btDbvtNode*>(leaves[rand()%cfgLeaves]), btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale)); } } const int time=(int)wallclock.getTimeMilliseconds(); const int up=cfgBenchmark9_Passes*cfgBenchmark9_Iterations; printf("%u ms (%i%%),(%u u/s)\r\n",time,(time-cfgBenchmark9_Reference)*100/time,up*1000/time); } if(cfgBenchmark10_Enable) {// Benchmark 10 srand(380843); btDbvt dbvt; btAlignedObjectArray<const btDbvtNode*> leaves; btAlignedObjectArray<btVector3> vectors; vectors.resize(cfgBenchmark10_Iterations); for(int i=0;i<vectors.size();++i) { vectors[i]=(btDbvtBenchmark::RandVector3()*2-btVector3(1,1,1))*cfgBenchmark10_Scale; } btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); dbvt.extractLeaves(dbvt.m_root,leaves); printf("[10] updates (jitter): "); wallclock.reset(); for(int i=0;i<cfgBenchmark10_Passes;++i) { for(int j=0;j<cfgBenchmark10_Iterations;++j) { const btVector3& d=vectors[j]; btDbvtNode* l=const_cast<btDbvtNode*>(leaves[rand()%cfgLeaves]); btDbvtVolume v=btDbvtVolume::FromMM(l->volume.Mins()+d,l->volume.Maxs()+d); dbvt.update(l,v); } } const int time=(int)wallclock.getTimeMilliseconds(); const int up=cfgBenchmark10_Passes*cfgBenchmark10_Iterations; printf("%u ms (%i%%),(%u u/s)\r\n",time,(time-cfgBenchmark10_Reference)*100/time,up*1000/time); } if(cfgBenchmark11_Enable) {// Benchmark 11 srand(380843); btDbvt dbvt; btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); printf("[11] optimize (incremental): "); wallclock.reset(); for(int i=0;i<cfgBenchmark11_Passes;++i) { dbvt.optimizeIncremental(cfgBenchmark11_Iterations); } const int time=(int)wallclock.getTimeMilliseconds(); const int op=cfgBenchmark11_Passes*cfgBenchmark11_Iterations; printf("%u ms (%i%%),(%u o/s)\r\n",time,(time-cfgBenchmark11_Reference)*100/time,op/time*1000); } if(cfgBenchmark12_Enable) {// Benchmark 12 srand(380843); btAlignedObjectArray<btDbvtVolume> volumes; btAlignedObjectArray<bool> results; volumes.resize(cfgLeaves); results.resize(cfgLeaves); for(int i=0;i<cfgLeaves;++i) { volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale); } printf("[12] btDbvtVolume notequal: "); wallclock.reset(); for(int i=0;i<cfgBenchmark12_Iterations;++i) { for(int j=0;j<cfgLeaves;++j) { for(int k=0;k<cfgLeaves;++k) { results[k]=NotEqual(volumes[j],volumes[k]); } } } const int time=(int)wallclock.getTimeMilliseconds(); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark12_Reference)*100/time); } if(cfgBenchmark13_Enable) {// Benchmark 13 srand(380843); btDbvt dbvt; btAlignedObjectArray<btVector3> vectors; btDbvtBenchmark::NilPolicy policy; vectors.resize(cfgBenchmark13_Iterations); for(int i=0;i<vectors.size();++i) { vectors[i]=(btDbvtBenchmark::RandVector3()*2-btVector3(1,1,1)).normalized(); } btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); printf("[13] culling(OCL+fullsort): "); wallclock.reset(); for(int i=0;i<cfgBenchmark13_Iterations;++i) { static const btScalar offset=0; policy.m_depth=-SIMD_INFINITY; dbvt.collideOCL(dbvt.m_root,&vectors[i],&offset,vectors[i],1,policy); } const int time=(int)wallclock.getTimeMilliseconds(); const int t=cfgBenchmark13_Iterations; printf("%u ms (%i%%),(%u t/s)\r\n",time,(time-cfgBenchmark13_Reference)*100/time,(t*1000)/time); } if(cfgBenchmark14_Enable) {// Benchmark 14 srand(380843); btDbvt dbvt; btAlignedObjectArray<btVector3> vectors; btDbvtBenchmark::P14 policy; vectors.resize(cfgBenchmark14_Iterations); for(int i=0;i<vectors.size();++i) { vectors[i]=(btDbvtBenchmark::RandVector3()*2-btVector3(1,1,1)).normalized(); } btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); policy.m_nodes.reserve(cfgLeaves); printf("[14] culling(OCL+qsort): "); wallclock.reset(); for(int i=0;i<cfgBenchmark14_Iterations;++i) { static const btScalar offset=0; policy.m_nodes.resize(0); dbvt.collideOCL(dbvt.m_root,&vectors[i],&offset,vectors[i],1,policy,false); policy.m_nodes.quickSort(btDbvtBenchmark::P14::sortfnc); } const int time=(int)wallclock.getTimeMilliseconds(); const int t=cfgBenchmark14_Iterations; printf("%u ms (%i%%),(%u t/s)\r\n",time,(time-cfgBenchmark14_Reference)*100/time,(t*1000)/time); } if(cfgBenchmark15_Enable) {// Benchmark 15 srand(380843); btDbvt dbvt; btAlignedObjectArray<btVector3> vectors; btDbvtBenchmark::P15 policy; vectors.resize(cfgBenchmark15_Iterations); for(int i=0;i<vectors.size();++i) { vectors[i]=(btDbvtBenchmark::RandVector3()*2-btVector3(1,1,1)).normalized(); } btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); policy.m_nodes.reserve(cfgLeaves); printf("[15] culling(KDOP+qsort): "); wallclock.reset(); for(int i=0;i<cfgBenchmark15_Iterations;++i) { static const btScalar offset=0; policy.m_nodes.resize(0); policy.m_axis=vectors[i]; dbvt.collideKDOP(dbvt.m_root,&vectors[i],&offset,1,policy); policy.m_nodes.quickSort(btDbvtBenchmark::P15::sortfnc); } const int time=(int)wallclock.getTimeMilliseconds(); const int t=cfgBenchmark15_Iterations; printf("%u ms (%i%%),(%u t/s)\r\n",time,(time-cfgBenchmark15_Reference)*100/time,(t*1000)/time); } if(cfgBenchmark16_Enable) {// Benchmark 16 srand(380843); btDbvt dbvt; btAlignedObjectArray<btDbvtNode*> batch; btDbvtBenchmark::RandTree(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale,cfgLeaves,dbvt); dbvt.optimizeTopDown(); batch.reserve(cfgBenchmark16_BatchCount); printf("[16] insert/remove batch(%u): ",cfgBenchmark16_BatchCount); wallclock.reset(); for(int i=0;i<cfgBenchmark16_Passes;++i) { for(int j=0;j<cfgBenchmark16_BatchCount;++j) { batch.push_back(dbvt.insert(btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale),0)); } for(int j=0;j<cfgBenchmark16_BatchCount;++j) { dbvt.remove(batch[j]); } batch.resize(0); } const int time=(int)wallclock.getTimeMilliseconds(); const int ir=cfgBenchmark16_Passes*cfgBenchmark16_BatchCount; printf("%u ms (%i%%),(%u bir/s)\r\n",time,(time-cfgBenchmark16_Reference)*100/time,int(ir*1000.0/time)); } if(cfgBenchmark17_Enable) {// Benchmark 17 srand(380843); btAlignedObjectArray<btDbvtVolume> volumes; btAlignedObjectArray<int> results; btAlignedObjectArray<int> indices; volumes.resize(cfgLeaves); results.resize(cfgLeaves); indices.resize(cfgLeaves); for(int i=0;i<cfgLeaves;++i) { indices[i]=i; volumes[i]=btDbvtBenchmark::RandVolume(cfgVolumeCenterScale,cfgVolumeExentsBase,cfgVolumeExentsScale); } for(int i=0;i<cfgLeaves;++i) { btSwap(indices[i],indices[rand()%cfgLeaves]); } printf("[17] btDbvtVolume select: "); wallclock.reset(); for(int i=0;i<cfgBenchmark17_Iterations;++i) { for(int j=0;j<cfgLeaves;++j) { for(int k=0;k<cfgLeaves;++k) { const int idx=indices[k]; results[idx]=Select(volumes[idx],volumes[j],volumes[k]); } } } const int time=(int)wallclock.getTimeMilliseconds(); printf("%u ms (%i%%)\r\n",time,(time-cfgBenchmark17_Reference)*100/time); } printf("\r\n\r\n"); }
void btRadixSort32CL::executeHost(btAlignedObjectArray<btSortData>& inout, int sortBits /* = 32 */) { int n = inout.size(); const int BITS_PER_PASS = 8; const int NUM_TABLES = (1<<BITS_PER_PASS); int tables[NUM_TABLES]; int counter[NUM_TABLES]; btSortData* src = &inout[0]; btAlignedObjectArray<btSortData> workbuffer; workbuffer.resize(inout.size()); btSortData* dst = &workbuffer[0]; int count=0; for(int startBit=0; startBit<sortBits; startBit+=BITS_PER_PASS) { for(int i=0; i<NUM_TABLES; i++) { tables[i] = 0; } for(int i=0; i<n; i++) { int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES-1); tables[tableIdx]++; } //#define TEST #ifdef TEST printf("histogram size=%d\n",NUM_TABLES); for (int i=0;i<NUM_TABLES;i++) { if (tables[i]!=0) { printf("tables[%d]=%d]\n",i,tables[i]); } } #endif //TEST // prefix scan int sum = 0; for(int i=0; i<NUM_TABLES; i++) { int iData = tables[i]; tables[i] = sum; sum += iData; counter[i] = 0; } // distribute for(int i=0; i<n; i++) { int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES-1); dst[tables[tableIdx] + counter[tableIdx]] = src[i]; counter[tableIdx] ++; } btSwap( src, dst ); count++; } if (count&1) { btAssert(0);//need to copy } }
void btRadixSort32CL::execute(btOpenCLArray<btSortData>& keyValuesInOut, int sortBits /* = 32 */) { int originalSize = keyValuesInOut.size(); int workingSize = originalSize; int dataAlignment = DATA_ALIGNMENT; #ifdef DEBUG_RADIXSORT2 btAlignedObjectArray<btSortData> test2; keyValuesInOut.copyToHost(test2); printf("numElem = %d\n",test2.size()); for (int i=0;i<test2.size();i++) { printf("test2[%d].m_key=%d\n",i,test2[i].m_key); printf("test2[%d].m_value=%d\n",i,test2[i].m_value); } #endif //DEBUG_RADIXSORT2 btOpenCLArray<btSortData>* src = 0; if (workingSize%dataAlignment) { workingSize += dataAlignment-(workingSize%dataAlignment); m_workBuffer4->copyFromOpenCLArray(keyValuesInOut); m_workBuffer4->resize(workingSize); btSortData fillValue; fillValue.m_key = 0xffffffff; fillValue.m_value = 0xffffffff; #define USE_BTFILL #ifdef USE_BTFILL m_fill->execute((btOpenCLArray<btInt2>&)*m_workBuffer4,(btInt2&)fillValue,workingSize-originalSize,originalSize); #else //fill the remaining bits (very slow way, todo: fill on GPU/OpenCL side) for (int i=originalSize; i<workingSize;i++) { m_workBuffer4->copyFromHostPointer(&fillValue,1,i); } #endif//USE_BTFILL src = m_workBuffer4; } else { src = &keyValuesInOut; m_workBuffer4->resize(0); } btAssert( workingSize%DATA_ALIGNMENT == 0 ); int minCap = NUM_BUCKET*NUM_WGS; int n = workingSize; m_workBuffer1->resize(minCap); m_workBuffer3->resize(workingSize); // ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); btAssert( BITS_PER_PASS == 4 ); btAssert( WG_SIZE == 64 ); btAssert( (sortBits&0x3) == 0 ); btOpenCLArray<btSortData>* dst = m_workBuffer3; btOpenCLArray<unsigned int>* srcHisto = m_workBuffer1; btOpenCLArray<unsigned int>* destHisto = m_workBuffer2; int nWGs = NUM_WGS; btConstData cdata; { int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;//set at 256 int nBlocks = (n+blockSize-1)/(blockSize); cdata.m_n = n; cdata.m_nWGs = NUM_WGS; cdata.m_startBit = 0; cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1)/cdata.m_nWGs; if( nBlocks < NUM_WGS ) { cdata.m_nBlocksPerWG = 1; nWGs = nBlocks; } } int count=0; for(int ib=0; ib<sortBits; ib+=4) { #ifdef DEBUG_RADIXSORT2 keyValuesInOut.copyToHost(test2); printf("numElem = %d\n",test2.size()); for (int i=0;i<test2.size();i++) { if (test2[i].m_key != test2[i].m_value) { printf("test2[%d].m_key=%d\n",i,test2[i].m_key); printf("test2[%d].m_value=%d\n",i,test2[i].m_value); } } #endif //DEBUG_RADIXSORT2 cdata.m_startBit = ib; { btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( srcHisto->getBufferCL() ) }; btLauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( cdata ); int num = NUM_WGS*WG_SIZE; launcher.launch1D( num, WG_SIZE ); } #ifdef DEBUG_RADIXSORT btAlignedObjectArray<unsigned int> testHist; srcHisto->copyToHost(testHist); printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size()); for (int i=0;i<testHist.size();i++) { if (testHist[i]!=0) printf("testHist[%d]=%d\n",i,testHist[i]); } #endif //DEBUG_RADIXSORT //fast prefix scan is not working properly on Mac OSX yet #ifdef _WIN32 bool fastScan=true; #else bool fastScan=false; #endif if (fastScan) {// prefix scan group histogram btBufferInfoCL bInfo[] = { btBufferInfoCL( srcHisto->getBufferCL() ) }; btLauncherCL launcher( m_commandQueue, m_prefixScanKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( 128, 128 ); destHisto = srcHisto; }else { //unsigned int sum; //for debugging m_scan->execute(*srcHisto,*destHisto,1920,0);//,&sum); } #ifdef DEBUG_RADIXSORT destHisto->copyToHost(testHist); printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size()); for (int i=0;i<testHist.size();i++) { if (testHist[i]!=0) printf("testHist[%d]=%d\n",i,testHist[i]); } for (int i=0;i<testHist.size();i+=NUM_WGS) { printf("testHist[%d]=%d\n",i/NUM_WGS,testHist[i]); } #endif //DEBUG_RADIXSORT #define USE_GPU #ifdef USE_GPU {// local sort and distribute btBufferInfoCL bInfo[] = { btBufferInfoCL( src->getBufferCL(), true ), btBufferInfoCL( destHisto->getBufferCL(), true ), btBufferInfoCL( dst->getBufferCL() )}; btLauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); } #else { #define NUM_TABLES 16 //#define SEQUENTIAL #ifdef SEQUENTIAL int counter2[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; int tables[NUM_TABLES]; int startBit = ib; destHisto->copyToHost(testHist); btAlignedObjectArray<btSortData> srcHost; btAlignedObjectArray<btSortData> dstHost; dstHost.resize(src->size()); src->copyToHost(srcHost); for (int i=0;i<NUM_TABLES;i++) { tables[i] = testHist[i*NUM_WGS]; } // distribute for(int i=0; i<n; i++) { int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1); dstHost[tables[tableIdx] + counter2[tableIdx]] = srcHost[i]; counter2[tableIdx] ++; } #else int counter2[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; int tables[NUM_TABLES]; btAlignedObjectArray<btSortData> dstHostOK; dstHostOK.resize(src->size()); destHisto->copyToHost(testHist); btAlignedObjectArray<btSortData> srcHost; src->copyToHost(srcHost); int blockSize = 256; int nBlocksPerWG = cdata.m_nBlocksPerWG; int startBit = ib; { for (int i=0;i<NUM_TABLES;i++) { tables[i] = testHist[i*NUM_WGS]; } // distribute for(int i=0; i<n; i++) { int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1); dstHostOK[tables[tableIdx] + counter2[tableIdx]] = srcHost[i]; counter2[tableIdx] ++; } } btAlignedObjectArray<btSortData> dstHost; dstHost.resize(src->size()); int counter[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; for (int wgIdx=0;wgIdx<NUM_WGS;wgIdx++) { int counter[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; for(int iblock=0; iblock<btMin(cdata.m_nBlocksPerWG, nBlocks); iblock++) { for (int lIdx = 0;lIdx < 64;lIdx++) { int addr = iblock*blockSize + blockSize*cdata.m_nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops // AMD: AtomInc performs better while NV prefers ++ for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++) { if( addr+j < n ) { // printf ("addr+j=%d\n", addr+j); int i = addr+j; int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1); int destIndex = testHist[tableIdx*NUM_WGS+wgIdx] + counter[tableIdx]; btSortData ok = dstHostOK[destIndex]; if (ok.m_key != srcHost[i].m_key) { printf("ok.m_key = %d, srcHost[i].m_key = %d\n", ok.m_key,srcHost[i].m_key ); printf("(ok.m_value = %d, srcHost[i].m_value = %d)\n", ok.m_value,srcHost[i].m_value ); } if (ok.m_value != srcHost[i].m_value) { printf("ok.m_value = %d, srcHost[i].m_value = %d\n", ok.m_value,srcHost[i].m_value ); printf("(ok.m_key = %d, srcHost[i].m_key = %d)\n", ok.m_key,srcHost[i].m_key ); } dstHost[destIndex] = srcHost[i]; counter[tableIdx] ++; } } } } } #endif //SEQUENTIAL dst->copyFromHost(dstHost); } #endif//USE_GPU #ifdef DEBUG_RADIXSORT destHisto->copyToHost(testHist); printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size()); for (int i=0;i<testHist.size();i++) { if (testHist[i]!=0) printf("testHist[%d]=%d\n",i,testHist[i]); } #endif //DEBUG_RADIXSORT btSwap(src, dst ); btSwap(srcHisto,destHisto); #ifdef DEBUG_RADIXSORT2 keyValuesInOut.copyToHost(test2); printf("numElem = %d\n",test2.size()); for (int i=0;i<test2.size();i++) { if (test2[i].m_key != test2[i].m_value) { printf("test2[%d].m_key=%d\n",i,test2[i].m_key); printf("test2[%d].m_value=%d\n",i,test2[i].m_value); } } #endif //DEBUG_RADIXSORT2 count++; }
bool btPolyhedralConvexShape::initializePolyhedralFeatures() { if (m_polyhedron) btAlignedFree(m_polyhedron); void* mem = btAlignedAlloc(sizeof(btConvexPolyhedron),16); m_polyhedron = new (mem) btConvexPolyhedron; btAlignedObjectArray<btVector3> tmpVertices; for (int i=0;i<getNumVertices();i++) { btVector3& newVertex = tmpVertices.expand(); getVertex(i,newVertex); } btConvexHullComputer conv; conv.compute(&tmpVertices[0].getX(), sizeof(btVector3),tmpVertices.size(),0.f,0.f); btAlignedObjectArray<btVector3> faceNormals; int numFaces = conv.faces.size(); faceNormals.resize(numFaces); btConvexHullComputer* convexUtil = &conv; m_polyhedron->m_faces.resize(numFaces); int numVertices = convexUtil->vertices.size(); m_polyhedron->m_vertices.resize(numVertices); for (int p=0;p<numVertices;p++) { m_polyhedron->m_vertices[p] = convexUtil->vertices[p]; } for (int i=0;i<numFaces;i++) { int face = convexUtil->faces[i]; //printf("face=%d\n",face); const btConvexHullComputer::Edge* firstEdge = &convexUtil->edges[face]; const btConvexHullComputer::Edge* edge = firstEdge; btVector3 edges[3]; int numEdges = 0; //compute face normals //btScalar maxCross2 = 0.f; //int chosenEdge = -1; do { int src = edge->getSourceVertex(); m_polyhedron->m_faces[i].m_indices.push_back(src); int targ = edge->getTargetVertex(); btVector3 wa = convexUtil->vertices[src]; btVector3 wb = convexUtil->vertices[targ]; btVector3 newEdge = wb-wa; newEdge.normalize(); if (numEdges<2) edges[numEdges++] = newEdge; edge = edge->getNextEdgeOfFace(); } while (edge!=firstEdge); btScalar planeEq = 1e30f; if (numEdges==2) { faceNormals[i] = edges[0].cross(edges[1]); faceNormals[i].normalize(); m_polyhedron->m_faces[i].m_plane[0] = -faceNormals[i].getX(); m_polyhedron->m_faces[i].m_plane[1] = -faceNormals[i].getY(); m_polyhedron->m_faces[i].m_plane[2] = -faceNormals[i].getZ(); m_polyhedron->m_faces[i].m_plane[3] = planeEq; } else { btAssert(0);//degenerate? faceNormals[i].setZero(); } for (int v=0;v<m_polyhedron->m_faces[i].m_indices.size();v++) { btScalar eq = m_polyhedron->m_vertices[m_polyhedron->m_faces[i].m_indices[v]].dot(faceNormals[i]); if (planeEq>eq) { planeEq=eq; } } m_polyhedron->m_faces[i].m_plane[3] = planeEq; } if (m_polyhedron->m_faces.size() && conv.vertices.size()) { for (int f=0;f<m_polyhedron->m_faces.size();f++) { btVector3 planeNormal(m_polyhedron->m_faces[f].m_plane[0],m_polyhedron->m_faces[f].m_plane[1],m_polyhedron->m_faces[f].m_plane[2]); btScalar planeEq = m_polyhedron->m_faces[f].m_plane[3]; btVector3 supVec = localGetSupportingVertex(-planeNormal); if (supVec.dot(planeNormal)<planeEq) { m_polyhedron->m_faces[f].m_plane[0] *= -1; m_polyhedron->m_faces[f].m_plane[1] *= -1; m_polyhedron->m_faces[f].m_plane[2] *= -1; m_polyhedron->m_faces[f].m_plane[3] *= -1; int numVerts = m_polyhedron->m_faces[f].m_indices.size(); for (int v=0;v<numVerts/2;v++) { btSwap(m_polyhedron->m_faces[f].m_indices[v],m_polyhedron->m_faces[f].m_indices[numVerts-1-v]); } } } } m_polyhedron->initialize(); return true; }