Пример #1
0
void SimpleContactSolver::solveContacts(unsigned numContacts,
										CUDABuffer * contactBuf,
										CUDABuffer * pairBuf,
										void * objectData)
{
#if DISABLE_COLLISION_RESOLUTION
	return;
#endif
    if(numContacts < 1) return; 
    
	m_numContacts = numContacts;
	const unsigned indBufLength = iRound1024(numContacts * 2);
	
	m_sortedInd[0]->create(indBufLength * 8);	
	m_sortedInd[1]->create(indBufLength * 8);
	
	void * bodyContactHash = m_sortedInd[0]->bufferOnDevice();
	void * pairs = pairBuf->bufferOnDevice();
	
	simpleContactSolverWriteContactIndex((KeyValuePair *)bodyContactHash, (uint *)pairs, numContacts * 2, indBufLength);
	
	void * tmp = m_sortedInd[1]->bufferOnDevice();
	RadixSort((KeyValuePair *)bodyContactHash, (KeyValuePair *)tmp, indBufLength, 30);
	
	m_splitPair->create(numContacts * 8);
	void * splits = m_splitPair->bufferOnDevice();
	
	const unsigned splitBufLength = numContacts * 2;
	simpleContactSolverComputeSplitBufLoc((uint2 *)splits, 
	                        (uint2 *)pairs, 
	                        (KeyValuePair *)bodyContactHash, 
	                        splitBufLength);
	
	m_bodyCount->create(splitBufLength * 4);
	void * bodyCount = m_bodyCount->bufferOnDevice();
	simpleContactSolverCountBody((uint *)bodyCount, 
	                        (KeyValuePair *)bodyContactHash, 
	                        splitBufLength);
							
	int mxcount = 0;
	max<int>(mxcount, (int *)bodyCount, splitBufLength);
// if(mxcount>9) std::cout<<" max count per contact "<<mxcount; 
	int numiterations = mxcount + 3;
	
	m_splitInverseMass->create(splitBufLength * 4);
	void * splitMass = m_splitInverseMass->bufferOnDevice();
	
	CudaNarrowphase::CombinedObjectBuffer * objectBuf = (CudaNarrowphase::CombinedObjectBuffer *)objectData;
	void * pos = objectBuf->m_pos->bufferOnDevice();
	void * vel = objectBuf->m_vel->bufferOnDevice();
	void * mass = objectBuf->m_mass->bufferOnDevice();
	void * ind = objectBuf->m_ind->bufferOnDevice();
	void * perObjPointStart = objectBuf->m_pointCacheLoc->bufferOnDevice();
	void * perObjectIndexStart = objectBuf->m_indexCacheLoc->bufferOnDevice();
	
	simpleContactSolverComputeSplitInverseMass((float *)splitMass,
	                        (uint2 *)splits,
	                        (uint2 *)pairs,
	                        (float *)mass,
	                        (uint4 *)ind,
	                        (uint * )perObjPointStart,
	                        (uint * )perObjectIndexStart,
                            (uint *)bodyCount,
                            splitBufLength);
	
	m_constraint->create(numContacts * 64);
	void * constraint = m_constraint->bufferOnDevice();
	
	void * contacts = contactBuf->bufferOnDevice();
	
	simpleContactSolverSetContactConstraint((ContactConstraint *)constraint,
	    (uint2 *)splits,
	    (uint2 *)pairs,
	    (float3 *)pos,
	    (float3 *)vel,
	    (uint4 *)ind,
        (uint * )perObjPointStart,
        (uint * )perObjectIndexStart,
        (float *)splitMass,
	    (ContactData *)contacts,
        numContacts * 2);
    CudaBase::CheckCudaError("jacobi solver set constraint");
	
	m_deltaLinearVelocity->create(nextPow2(splitBufLength * 12));
	m_deltaAngularVelocity->create(nextPow2(splitBufLength * 12));
	
	void * deltaLinVel = m_deltaLinearVelocity->bufferOnDevice();
	void * deltaAngVel = m_deltaAngularVelocity->bufferOnDevice();
	simpleContactSolverClearDeltaVelocity((float3 *)deltaLinVel, 
	                            (float3 *)deltaAngVel, 
	                            splitBufLength);
	
	/*
	const unsigned scanBufLength = iRound1024(numContacts * 2);
	m_bodyCount->create(scanBufLength * 4);
	m_scanBodyCount[0]->create(scanBufLength * 4);
	m_scanBodyCount[1]->create(scanBufLength * 4);
	
	
	void * scanResult = m_scanBodyCount[0]->bufferOnDevice();
	void * scanIntermediate = m_scanBodyCount[1]->bufferOnDevice();
	scanExclusive((uint *)scanResult, (uint *)bodyCount, (uint *)scanIntermediate, scanBufLength / 1024, 1024);
	
	const unsigned numSplitBodies = ScanUtil::getScanResult(m_bodyCount, m_scanBodyCount[0], scanBufLength);
	*/
	
	int i;
	for(i=0; i< numiterations; i++) {
// compute impulse and velocity changes per contact
        simpleContactSolverSolveContactWoJ((ContactConstraint *)constraint,
	                    (float3 *)deltaLinVel,
	                    (float3 *)deltaAngVel,
	                    (uint2 *)pairs,
	                    (uint2 *)splits,
	                    (float *)splitMass,
	                    (ContactData *)contacts,
	                    (float3 *)pos,
	                    (float3 *)vel,
	                    (uint4 *)ind,
	                    (uint * )perObjPointStart,
	                    (uint * )perObjectIndexStart,
	                    numContacts * 2);
        CudaBase::CheckCudaError("jacobi solver solve impulse");
    
	    simpleContactSolverAverageVelocities((float3 *)deltaLinVel,
                        (float3 *)deltaAngVel,
                        (uint *)bodyCount,
                        (KeyValuePair *)bodyContactHash, 
                        splitBufLength);
        CudaBase::CheckCudaError("jacobi solver average velocity");
	}
	
// 2 tet per contact, 4 pnt per tet, key is pnt index, value is tet index in split
	const unsigned pntHashBufLength = iRound1024(numContacts * 2 * 4);
    // std::cout<<"\n pntHashBufLength"<<pntHashBufLength
    // <<" numContact"<<numContacts;
	m_pntTetHash[0]->create(pntHashBufLength * 8);
	m_pntTetHash[1]->create(pntHashBufLength * 8);
	
	void * pntTetHash = m_pntTetHash[0]->bufferOnDevice();
	
	simpleContactSolverWritePointTetHash((KeyValuePair *)pntTetHash,
	                (uint2 *)pairs,
	                (uint2 *)splits,
	                (uint *)bodyCount,
	                (uint4 *)ind,
	                (uint * )perObjPointStart,
	                (uint * )perObjectIndexStart,
	                numContacts * 2,
	                pntHashBufLength);
    CudaBase::CheckCudaError(CudaBase::Synchronize(),
                             "jacobi solver point-tetra hash");
    
	void * intermediate = m_pntTetHash[1]->bufferOnDevice();
	RadixSort((KeyValuePair *)pntTetHash, (KeyValuePair *)intermediate, pntHashBufLength, 24);

#if 0
    svlg.writeHash(m_pntTetHash[1], numContacts * 2, 
                   "pnttet_hash", CudaDbgLog::FAlways);
#endif
    
	simpleContactSolverUpdateVelocity((float3 *)vel,
	                (float3 *)deltaLinVel,
	                (float3 *)deltaAngVel,
	                (KeyValuePair *)pntTetHash,
                    (uint2 *)pairs,
                    (uint2 *)splits,
                    (ContactConstraint *)constraint,
                    (ContactData *)contacts,
                    (float3 *)pos,
                    (uint4 *)ind,
                    (uint * )perObjPointStart,
                    (uint * )perObjectIndexStart,
                    numContacts * 2 * 4);
    CudaBase::CheckCudaError(CudaBase::Synchronize(),
        "jacobi solver update velocity");
}
Пример #2
0
void CudaBroadphase::computeOverlappingPairs()
{
#if DISABLE_COLLISION_DETECTION
	return;
#endif	
	if(m_numObjects < 1) return;
	unsigned i, j;
	
	resetPairCounts();
	
	for(j = 0; j<m_numObjects; j++) {
		for(i = j+1; i<m_numObjects; i++) {
			countOverlappingPairs(j, i);
		}
	}
	
	for(j = 0; j<m_numObjects; j++) {
		countOverlappingPairs(j, j);
	}
	
	m_pairCacheLength = m_scanIntermediate->prefixSum(m_pairStart, m_pairCounts, m_scanBufferLength);
	
	if(m_pairCacheLength < 1) return;
#if 0
	bphlg.writeUInt(m_pairCounts,
         m_numBoxes,
                "overlapping_counts", CudaDbgLog::FAlways);
#endif
#if 0
    bphlg.writeUInt(m_pairStart,
         m_numBoxes,
                "overlapping_offsets", CudaDbgLog::FAlways);
	std::cout<<" overlapping pair cache length "<<m_pairCacheLength<<"\n";
#endif	
	setWriteLocation();
#if 0	
	bphlg.writeUInt(m_pairWriteLocation,
         m_numBoxes,
                "overlapping_write_location0", CudaDbgLog::FAlways);
#endif
	m_pairCache->create(m_pairCacheLength * 8);
	
	void * cache = m_pairCache->bufferOnDevice();
	broadphaseResetPairCache((uint2 *)cache, m_pairCacheLength);
	
	for(j = 0; j<m_numObjects; j++) {
		for(i = j+1; i<m_numObjects; i++) {
			writeOverlappingPairs(j, i);
		}
	}
	
	for(j = 0; j<m_numObjects; j++) {
		writeOverlappingPairs(j, j);
	}
#if 0
	bphlg.writeUInt(m_pairWriteLocation,
         m_numBoxes,
                "overlapping_write_location1", CudaDbgLog::FAlways);
#endif
#if 0    
    bphlg.writeHash(m_pairCache,
         m_pairCacheLength,
                "overlapping_pairs", CudaDbgLog::FAlways);
#endif
}
Пример #3
0
void SolverThread::stepPhysics(float dt)
{
	computeForces();

	clearStiffnessAssembly();	

	if(bUseStiffnessWarping)
		updateOrientation();
	else
		resetOrientation();

	stiffnessAssembly();
	
	// addPlasticityForce(dt);
 
	dynamicsAssembly(dt);
 
#if SOLVEONGPU
	solveGpu(m_V, m_stiffnessMatrix);
#else
    solve(m_V);
#endif
 
	updatePosition(dt);
	
#if ENABLE_DBG	
	dbglg.write("Re");
	unsigned totalTetrahedra = m_mesh->numTetrahedra();
	FEMTetrahedronMesh::Tetrahedron * tetrahedra = m_mesh->tetrahedra();
	for(unsigned k=0;k<totalTetrahedra;k++) {
	    dbglg.write(k);
		dbglg.write(tetrahedra[k].Re.str());
	}

	dbglg.writeMat33(m_stiffnessMatrix->valueBuf(), 
	    m_stiffnessMatrix->numNonZero(),
	    "K ");

	dbglg.write("Rhs");
	unsigned totalPoints = m_mesh->numPoints();
	for(unsigned k=0;k<totalPoints;k++) {
	    dbglg.write(k);
		dbglg.write(rightHandSide()[k].str());
		dbglg.newLine();
	}
	dbglg.write("F0");
	for(unsigned k=0;k<totalPoints;k++) {
	    dbglg.write(k);
		dbglg.write(m_F0[k].str());
		dbglg.newLine();
	}
#endif
}
Пример #4
0
void SolverThread::calculateK()
{
#if ENABLE_DBG
    dbglg.write("Ke");
#endif
    unsigned totalTetrahedra = m_mesh->numTetrahedra();
    Vector3F * Xi = m_mesh->Xi();
    FEMTetrahedronMesh::Tetrahedron * tetrahedra = m_mesh->tetrahedra();
    
    for(unsigned k=0;k<totalTetrahedra;k++) {
		
		Vector3F x0 = Xi[tetrahedra[k].indices[0]];
		Vector3F x1 = Xi[tetrahedra[k].indices[1]];
		Vector3F x2 = Xi[tetrahedra[k].indices[2]];
		Vector3F x3 = Xi[tetrahedra[k].indices[3]];
		
		//For this check page no.: 344-346 of Kenny Erleben's book Physics based Animation
		//Eq. 10.30(a-c)
		Vector3F e10 = x1-x0;
		Vector3F e20 = x2-x0;
		Vector3F e30 = x3-x0;

		// tetrahedra[k].e1 = e10;
		// tetrahedra[k].e2 = e20;
		// tetrahedra[k].e3 = e30;

		tetrahedra[k].volume= FEMTetrahedronMesh::getTetraVolume(e10,e20,e30);
		
		//Eq. 10.32
		Matrix33F E; 
		E.fill(e10, e20, e30);
		
		float detE = E.determinant(); if(detE ==0.f) std::cout<<" zero det "<<E.str()<<"\n";
		float invDetE = 1.0f/detE;	
		
		//Eq. 10.40 (a) & Eq. 10.42 (a)
		//Shape function derivatives wrt x,y,z
		// d/dx N0
		float invE10 = (e20.z*e30.y - e20.y*e30.z)*invDetE;
		float invE20 = (e10.y*e30.z - e10.z*e30.y)*invDetE;
		float invE30 = (e10.z*e20.y - e10.y*e20.z)*invDetE;
		float invE00 = -invE10-invE20-invE30;

		//Eq. 10.40 (b) & Eq. 10.42 (b)
		// d/dy N0
		float invE11 = (e20.x*e30.z - e20.z*e30.x)*invDetE;
		float invE21 = (e10.z*e30.x - e10.x*e30.z)*invDetE;
		float invE31 = (e10.x*e20.z - e10.z*e20.x)*invDetE;
		float invE01 = -invE11-invE21-invE31;

		//Eq. 10.40 (c) & Eq. 10.42 (c)
		// d/dz N0
		float invE12 = (e20.y*e30.x - e20.x*e30.y)*invDetE;
		float invE22 = (e10.x*e30.y - e10.y*e30.x)*invDetE;
		float invE32 = (e10.y*e20.x - e10.x*e20.y)*invDetE;
		float invE02 = -invE12-invE22-invE32;

		//Eq. 10.43 
		//Bn ~ [bn cn dn]^T
		// bn = d/dx N0 = [ invE00 invE10 invE20 invE30 ]
		// cn = d/dy N0 = [ invE01 invE11 invE21 invE31 ]
		// dn = d/dz N0 = [ invE02 invE12 invE22 invE32 ]
		tetrahedra[k].B[0] = Vector3F(invE00, invE01, invE02);		
		tetrahedra[k].B[1] = Vector3F(invE10, invE11, invE12);		
		tetrahedra[k].B[2] = Vector3F(invE20, invE21, invE22);		
		tetrahedra[k].B[3] = Vector3F(invE30, invE31, invE32);
		
		// std::cout<<"B[0] "<<tetrahedra[k].B[0]<<"\n";
		// std::cout<<"B[1] "<<tetrahedra[k].B[1]<<"\n";
		// std::cout<<"B[2] "<<tetrahedra[k].B[2]<<"\n";
		// std::cout<<"B[3] "<<tetrahedra[k].B[3]<<"\n";
 
		for(unsigned i=0;i<4;i++) {
			for(unsigned j=0;j<4;j++) {
				Matrix33F & Ke = tetrahedra[k].Ke[i][j];
				float d19 = tetrahedra[k].B[i].x;
				float d20 = tetrahedra[k].B[i].y;
				float d21 = tetrahedra[k].B[i].z;
				float d22 = tetrahedra[k].B[j].x;
				float d23 = tetrahedra[k].B[j].y;
				float d24 = tetrahedra[k].B[j].z;
				*Ke.m(0, 0)= d16 * d19 * d22 + d18 * (d20 * d23 + d21 * d24);
				*Ke.m(0, 1)= d17 * d19 * d23 + d18 * (d20 * d22);
				*Ke.m(0, 2)= d17 * d19 * d24 + d18 * (d21 * d22);

				*Ke.m(1, 0)= d17 * d20 * d22 + d18 * (d19 * d23);
				*Ke.m(1, 1)= d16 * d20 * d23 + d18 * (d19 * d22 + d21 * d24);
				*Ke.m(1, 2)= d17 * d20 * d24 + d18 * (d21 * d23);

				*Ke.m(2, 0)= d17 * d21 * d22 + d18 * (d19 * d24);
				*Ke.m(2, 1)= d17 * d21 * d23 + d18 * (d20 * d24);
				*Ke.m(2, 2)= d16 * d21 * d24 + d18 * (d20 * d23 + d19 * d22);

				Ke *= tetrahedra[k].volume;
#if ENABLE_DBG				
				dbglg.write("kij");
				dbglg.write(k);
				dbglg.write(i);
				dbglg.write(j);
				dbglg.write(Ke.str());
#endif
			}
		}
 	}
}
Пример #5
0
void SimpleContactSolver::solveContacts(unsigned numContacts,
										CUDABuffer * contactBuf,
										CUDABuffer * pairBuf,
										void * objectData)
{
#if DISABLE_COLLISION_RESOLUTION
	return;
#endif
    if(numContacts < 1) return; 
    
#if 0
    svlg.writeInt2( pairBuf,
                    numContacts,
                   "pair", CudaDbgLog::FAlways);
#endif
    
	const unsigned indBufLength = iRound1024(numContacts * 2);
	
	m_sortedInd[0]->create(indBufLength * 8);	
	m_sortedInd[1]->create(indBufLength * 8);
	
	void * bodyContactHash = m_sortedInd[0]->bufferOnDevice();
	void * pairs = pairBuf->bufferOnDevice();
	
/*  
 *  for either side of each contact pair, set
 *  key: body index
 *  velue: contact index
 *  n x 2 hash
 *  sort by body index to put the same body together 
 */
	simpleContactSolverWriteContactIndex((KeyValuePair *)bodyContactHash, 
	    (uint *)pairs, 
	    numContacts * 2, 
	    indBufLength);
	
	void * tmp = m_sortedInd[1]->bufferOnDevice();
	RadixSort((KeyValuePair *)bodyContactHash, (KeyValuePair *)tmp, indBufLength, 30);

#if 0
    svlg.writeHash( m_sortedInd[0],
                    numContacts * 2,
                   "body-contact", CudaDbgLog::FAlways);
#endif

/*
 *  for each hash, find the index of contact pair
 *  set the indirection from contact pair to hash index
 */
	m_splitPair->create(numContacts * 8);
	void * splits = m_splitPair->bufferOnDevice();
	
	const unsigned splitBufLength = numContacts * 2;
	simpleContactSolverComputeSplitBufLoc((uint2 *)splits, 
	                        (uint2 *)pairs, 
	                        (KeyValuePair *)bodyContactHash, 
	                        splitBufLength);

#if 0
    svlg.writeInt2( m_splitPair,
                    numContacts,
                   "splitpair", CudaDbgLog::FAlways);
#endif
	
	m_bodyCount->create(splitBufLength * 4);
	void * bodyCount = m_bodyCount->bufferOnDevice();
	simpleContactSolverCountBody((uint *)bodyCount, 
	                        (KeyValuePair *)bodyContactHash, 
	                        splitBufLength);

#if 0
// num iterattions by max contacts per object
// todo ignore static object count
	int mxcount = 0;
	max<int>(mxcount, (int *)bodyCount, splitBufLength);
	int numiterations = mxcount + 3;
#else
	int numiterations = 9;
#endif
	
	m_splitInverseMass->create(splitBufLength * 4);
	void * splitMass = m_splitInverseMass->bufferOnDevice();
	
	CudaNarrowphase::CombinedObjectBuffer * objectBuf = (CudaNarrowphase::CombinedObjectBuffer *)objectData;
	void * pos = objectBuf->m_pos->bufferOnDevice();
	void * vel = objectBuf->m_vel->bufferOnDevice();
	void * mass = objectBuf->m_mass->bufferOnDevice();
    void * linearImpulse = objectBuf->m_linearImpulse->bufferOnDevice();
	void * ind = objectBuf->m_ind->bufferOnDevice();
	void * perObjPointStart = objectBuf->m_pointCacheLoc->bufferOnDevice();
	void * perObjectIndexStart = objectBuf->m_indexCacheLoc->bufferOnDevice();
	m_bodyTetInd->create(4* 4 * numContacts *2);
	
	simpleContactSolverComputeSplitInverseMass((float *)splitMass,
	                        (uint2 *)splits,
	                        (uint2 *)pairs,
	                        (float *)mass,
	                        (uint4 *)ind,
	                        (uint * )perObjPointStart,
	                        (uint * )perObjectIndexStart,
                            (uint *)bodyCount,
                            (uint4 *)m_bodyTetInd->bufferOnDevice(),
                            numContacts * 2);
    
#if 0
   // svlg.writeFlt( m_splitInverseMass,
     //               numContacts,
       //            "masstensor", CudaDbgLog::FAlways);
    
    svlg.writeUInt( objectBuf->m_pointCacheLoc,
                    2,
                   "pstart", CudaDbgLog::FAlways);
    svlg.writeUInt( objectBuf->m_indexCacheLoc,
                    2,
                   "istart", CudaDbgLog::FAlways);
#endif
	
    m_constraint->create(numContacts * 64);
	m_contactLinearVelocity->create(numContacts * 2 * 12);
	void * constraint = m_constraint->bufferOnDevice();
	void * contactLinearVel = m_contactLinearVelocity->bufferOnDevice();
	void * contacts = contactBuf->bufferOnDevice();
	
	contactconstraint::prepareNoPenetratingContact((ContactConstraint *)constraint,
	    (float3 *)contactLinearVel,
	    (uint2 *)splits,
	    (uint2 *)pairs,
	    (float3 *)pos,
	    (float3 *)vel,
        (float3 *)linearImpulse,
	    (float *)splitMass,
	    (ContactData *)contacts,
	    (uint4 *)m_bodyTetInd->bufferOnDevice(),
        numContacts * 2);
    CudaBase::CheckCudaError("jacobi solver prepare constraint");
#if 0
    svlg.writeUInt( m_bodyTetInd,
                    numContacts * 8,
                   "tet", CudaDbgLog::FAlways);
#endif
#if 0
    
    svlg.writeFlt( contactBuf,
                    numContacts * 12,
                   "contact", CudaDbgLog::FAlways);
#endif

#if 0
    svlg.writeStruct(m_constraint, numContacts, 
                   "constraint", 
                   constraintDesc,
                   64,
                   CudaDbgLog::FAlways);
   // svlg.writeVec3(m_contactLinearVelocity, numContacts * 2, 
     //              "contact_vel", CudaDbgLog::FAlways);
#endif

	m_deltaLinearVelocity->create(nextPow2(splitBufLength * 12));
	m_deltaAngularVelocity->create(nextPow2(splitBufLength * 12));
	
	void * deltaLinVel = m_deltaLinearVelocity->bufferOnDevice();
	void * deltaAngVel = m_deltaAngularVelocity->bufferOnDevice();
	simpleContactSolverClearDeltaVelocity((float3 *)deltaLinVel, 
	                            (float3 *)deltaAngVel, 
	                            splitBufLength);

	int i;
	for(i=0; i< numiterations; i++) {
// compute impulse and velocity changes per contact
        collisionres::resolveCollision((ContactConstraint *)constraint,
	                    (float3 *)contactLinearVel,
                        (float3 *)deltaLinVel,
	                    (uint2 *)pairs,
	                    (uint2 *)splits,
	                    (float *)splitMass,
	                    (ContactData *)contacts,
	                    numContacts * 2);
        CudaBase::CheckCudaError("jacobi solver resolve collision");
        
#if 0
    unsigned ii = i;
    svlg.write(ii);
#endif
#if 0
    svlg.writeVec3(m_deltaLinearVelocity, numContacts * 2, 
                   "deltaV_b4", CudaDbgLog::FAlways);
#endif
    
	    simpleContactSolverAverageVelocities((float3 *)deltaLinVel,
                        (float3 *)deltaAngVel,
                        (uint *)bodyCount,
                        (KeyValuePair *)bodyContactHash, 
                        splitBufLength);
        CudaBase::CheckCudaError("jacobi solver average velocity");
        
#if 0
    svlg.writeVec3(m_deltaLinearVelocity, numContacts * 2, 
                   "deltaV_avg", CudaDbgLog::FAlways);
#endif

        collisionres::resolveFriction((ContactConstraint *)constraint,
	                    (float3 *)contactLinearVel,
                        (float3 *)deltaLinVel,
	                    (uint2 *)pairs,
	                    (uint2 *)splits,
	                    (float *)splitMass,
	                    (ContactData *)contacts,
	                    numContacts * 2);
        CudaBase::CheckCudaError("jacobi solver resolve friction");
        
        simpleContactSolverAverageVelocities((float3 *)deltaLinVel,
                        (float3 *)deltaAngVel,
                        (uint *)bodyCount,
                        (KeyValuePair *)bodyContactHash, 
                        splitBufLength);
        CudaBase::CheckCudaError("jacobi solver average velocity");

	}

// 2 tet per contact, 4 pnt per tet, key is pnt index, value is tet index in split
	const unsigned pntHashBufLength = iRound1024(numContacts * 2 * 4);
    // std::cout<<"\n pntHashBufLength"<<pntHashBufLength
    // <<" numContact"<<numContacts;
	m_pntTetHash[0]->create(pntHashBufLength * 8);
	m_pntTetHash[1]->create(pntHashBufLength * 8);
	
	void * pntTetHash = m_pntTetHash[0]->bufferOnDevice();
	
	simpleContactSolverWritePointTetHash((KeyValuePair *)pntTetHash,
	                (uint2 *)pairs,
	                (uint2 *)splits,
	                (uint *)bodyCount,
	                (uint4 *)m_bodyTetInd->bufferOnDevice(),
	                numContacts * 2,
	                pntHashBufLength);
    CudaBase::CheckCudaError(// CudaBase::Synchronize(),
                             "jacobi solver point-tetra hash");
    
	void * intermediate = m_pntTetHash[1]->bufferOnDevice();
	RadixSort((KeyValuePair *)pntTetHash, (KeyValuePair *)intermediate, pntHashBufLength, 24);

#if 0
    svlg.writeHash(m_pntTetHash[1], numContacts * 2, 
                   "pnttet_hash", CudaDbgLog::FAlways);
#endif
    
    contactsolver::updateImpulse((float3 *)linearImpulse,
	                (float3 *)deltaLinVel,
	                (float3 *)deltaAngVel,
	                (KeyValuePair *)pntTetHash,
                    (uint2 *)pairs,
                    (uint2 *)splits,
                    (ContactConstraint *)constraint,
                    (ContactData *)contacts,
                    (float3 *)pos,
                    (uint4 *)ind,
                    (uint * )perObjPointStart,
                    (uint * )perObjectIndexStart,
                    numContacts * 2 * 4);
    CudaBase::CheckCudaError(// CudaBase::Synchronize(),
        "jacobi solver update velocity");
}