dgInt32 FastRayTest::BoxTestSimd (const dgVector& minBox, const dgVector& maxBox) const
{
	simd_128 boxP0 ((simd_128&)minBox);
	simd_128 boxP1 ((simd_128&)maxBox);

	simd_128 tt0 (((simd_128&)m_p0 <= boxP0) | ((simd_128&)m_p0 >= boxP1) & (simd_128&)m_isParallel);
	if (tt0.GetSignMask() & 0x07) {
		return 0;
	}
	tt0 = (boxP0 - (simd_128&)m_p0) * (simd_128&)m_dpInv;
	simd_128 tt1 ((boxP1 - (simd_128&)m_p0) * (simd_128&)m_dpInv);
	simd_128 t0 (((simd_128&)m_minT).GetMax(tt0.GetMin(tt1)));
	simd_128 t1 (((simd_128&)m_maxT).GetMin(tt0.GetMax(tt1)));
	t0 = t0.GetMax(t0.ShiftTripleRight());
	t1 = t1.GetMin(t1.ShiftTripleRight());
	t0 = t0.GetMax(t0.ShiftTripleRight());
	t1 = t1.GetMin(t1.ShiftTripleRight());
	return ((t0 < t1).GetSignMask() & 1);
}
bool dgCollisionConvexHull::Create (dgInt32 count, dgInt32 strideInBytes, const dgFloat32* const vertexArray, dgFloat32 tolerance)
{
	dgInt32 stride = strideInBytes / sizeof (dgFloat32);
	dgStack<dgFloat64> buffer(3 * 2 * count);
	for (dgInt32 i = 0; i < count; i ++) {
		buffer[i * 3 + 0] = vertexArray[i * stride + 0];
		buffer[i * 3 + 1] = vertexArray[i * stride + 1];
		buffer[i * 3 + 2] = vertexArray[i * stride + 2];
	}

	dgConvexHull3d* convexHull =  new (GetAllocator()) dgConvexHull3d (GetAllocator(), &buffer[0], 3 * sizeof (dgFloat64), count, tolerance);
	if (!convexHull->GetCount()) {
		// this is a degenerated hull hull to add some thickness and for a thick plane
		delete convexHull;

		dgStack<dgVector> tmp(3 * count);
		for (dgInt32 i = 0; i < count; i ++) {
			tmp[i][0] = dgFloat32 (buffer[i*3 + 0]);
			tmp[i][1] = dgFloat32 (buffer[i*3 + 1]);
			tmp[i][2] = dgFloat32 (buffer[i*3 + 2]);
			tmp[i][2] = dgFloat32 (0.0f);
		}
	
		dgObb sphere;
		sphere.SetDimensions (&tmp[0][0], sizeof (dgVector), count);

		dgInt32 index = 0;
		dgFloat32 size = dgFloat32 (1.0e10f);
		for (dgInt32 i = 0; i < 3; i ++) {
			if (sphere.m_size[i] < size) {
				index = i;
				size = sphere.m_size[i];
			}
		}
		dgVector normal (dgFloat32 (0.0f));
		normal[index] = dgFloat32 (1.0f);
		dgVector step = sphere.RotateVector (normal.Scale (dgFloat32 (0.05f)));
		for (dgInt32 i = 0; i < count; i ++) {
			dgVector p1 (tmp[i] + step);
			dgVector p2 (tmp[i] - step);

			buffer[i * 3 + 0] = p1.m_x;
			buffer[i * 3 + 1] = p1.m_y;
			buffer[i * 3 + 2] = p1.m_z;
			buffer[(i + count) * 3 + 0] = p2.m_x;
			buffer[(i + count) * 3 + 1] = p2.m_y;
			buffer[(i + count) * 3 + 2] = p2.m_z;
		}
		count *= 2;
		convexHull =  new (GetAllocator()) dgConvexHull3d (GetAllocator(), &buffer[0], 3 * sizeof (dgFloat64), count, tolerance);
		if (!convexHull->GetCount()) {
			delete convexHull;
			return false;
		}
	}

	// check for degenerated faces
	for (bool success = false; !success;  ) {
		success = true;
		const dgBigVector* const hullVertexArray = convexHull->GetVertexPool();

		dgStack<dgInt8> mask(convexHull->GetVertexCount());
		memset (&mask[0], 1, mask.GetSizeInBytes());
		for (dgConvexHull3d::dgListNode* node = convexHull->GetFirst(); node; node = node->GetNext()) {
			dgConvexHull3DFace& face = node->GetInfo();
			const dgBigVector& p0 = hullVertexArray[face.m_index[0]];
			const dgBigVector& p1 = hullVertexArray[face.m_index[1]];
			const dgBigVector& p2 = hullVertexArray[face.m_index[2]];
			dgAssert(p0.m_w == p1.m_w);
			dgAssert(p0.m_w == p2.m_w);
			dgBigVector p1p0 (p1 - p0);
			dgBigVector p2p0 (p2 - p0);
			dgBigVector normal (p2p0.CrossProduct(p1p0));
			dgFloat64 mag2 = normal.DotProduct(normal).GetScalar();
			if (mag2 < dgFloat64 (1.0e-6f * 1.0e-6f)) {
				success = false;
				dgInt32 index = -1;
				dgBigVector p2p1 (p2 - p1);
				dgFloat64 dist10 = p1p0.DotProduct(p1p0).GetScalar();
				dgFloat64 dist20 = p2p0.DotProduct(p2p0).GetScalar();
				dgFloat64 dist21 = p2p1.DotProduct(p2p1).GetScalar();
				if ((dist10 >= dist20) && (dist10 >= dist21)) {
					index = 2;
				} else if ((dist20 >= dist10) && (dist20 >= dist21)) {
					index = 1;
				} else if ((dist21 >= dist10) && (dist21 >= dist20)) {
					index = 0;
				}
				dgAssert (index != -1);
				mask[face.m_index[index]] = 0;
			}
		}
		if (!success) {
			dgInt32 count1 = 0;
			dgInt32 vertexCount = convexHull->GetVertexCount();
			for (dgInt32 i = 0; i < vertexCount; i ++) {
				if (mask[i]) {
					buffer[count1 * 3 + 0] = hullVertexArray[i].m_x;
					buffer[count1 * 3 + 1] = hullVertexArray[i].m_y;
					buffer[count1 * 3 + 2] = hullVertexArray[i].m_z;
					count1 ++;
				}
			}
			delete convexHull;
			convexHull =  new (GetAllocator()) dgConvexHull3d (GetAllocator(), &buffer[0], 3 * sizeof (dgFloat64), count1, tolerance);
		}
	}

	dgAssert (convexHull);
	dgInt32 vertexCount = convexHull->GetVertexCount();
	if (vertexCount < 4) {
		delete convexHull;
		return false;
	}
	

	const dgBigVector* const hullVertexArray = convexHull->GetVertexPool();

	dgPolyhedra polyhedra (GetAllocator());
	polyhedra.BeginFace();
	for (dgConvexHull3d::dgListNode* node = convexHull->GetFirst(); node; node = node->GetNext()) {
		dgConvexHull3DFace& face = node->GetInfo();
		polyhedra.AddFace (face.m_index[0], face.m_index[1], face.m_index[2]);
	}
	polyhedra.EndFace();

	if (vertexCount > 4) {
//		bool edgeRemoved = false;
//		while (RemoveCoplanarEdge (polyhedra, hullVertexArray)) {
//			edgeRemoved = true;
//		}
//		if (edgeRemoved) {
//			if (!CheckConvex (polyhedra, hullVertexArray)) {
//				delete convexHull;
//				return false;
//			}
//		}
		while (RemoveCoplanarEdge (polyhedra, hullVertexArray));
	}

	dgStack<dgInt32> vertexMap(vertexCount);
	memset (&vertexMap[0], -1, vertexCount * sizeof (dgInt32));

	dgInt32 mark = polyhedra.IncLRU();
	dgPolyhedra::Iterator iter (polyhedra);
	for (iter.Begin(); iter; iter ++) {
		dgEdge* const edge = &iter.GetNode()->GetInfo();
		if (edge->m_mark != mark) {
			if (vertexMap[edge->m_incidentVertex] == -1) {
				vertexMap[edge->m_incidentVertex] = m_vertexCount;
				m_vertexCount ++;
			}
			dgEdge* ptr = edge;
			do {
				ptr->m_mark = mark;
				ptr->m_userData = m_edgeCount;
				m_edgeCount ++;
				ptr = ptr->m_twin->m_next;
			} while (ptr != edge) ;
		}
	} 

	m_vertex = (dgVector*) m_allocator->Malloc (dgInt32 (m_vertexCount * sizeof (dgVector)));
	m_simplex = (dgConvexSimplexEdge*) m_allocator->Malloc (dgInt32 (m_edgeCount * sizeof (dgConvexSimplexEdge)));
	m_vertexToEdgeMapping = (const dgConvexSimplexEdge**) m_allocator->Malloc (dgInt32 (m_vertexCount * sizeof (dgConvexSimplexEdge*)));

	for (dgInt32 i = 0; i < vertexCount; i ++) {
		if (vertexMap[i] != -1) {
			m_vertex[vertexMap[i]] = hullVertexArray[i];
			m_vertex[vertexMap[i]].m_w = dgFloat32 (0.0f);
		}
	}
	delete convexHull;

	vertexCount = m_vertexCount;
	mark = polyhedra.IncLRU();;
	for (iter.Begin(); iter; iter ++) {
		dgEdge* const edge = &iter.GetNode()->GetInfo();
		if (edge->m_mark != mark) {
			dgEdge *ptr = edge;
			do {
				ptr->m_mark = mark;
				dgConvexSimplexEdge* const simplexPtr = &m_simplex[ptr->m_userData];
				simplexPtr->m_vertex = vertexMap[ptr->m_incidentVertex];
				simplexPtr->m_next = &m_simplex[ptr->m_next->m_userData];
				simplexPtr->m_prev = &m_simplex[ptr->m_prev->m_userData];
				simplexPtr->m_twin = &m_simplex[ptr->m_twin->m_userData];

				ptr = ptr->m_twin->m_next;
			} while (ptr != edge) ;
		}
	} 

	
	m_faceCount = 0;
	dgStack<char> faceMarks (m_edgeCount);
	memset (&faceMarks[0], 0, m_edgeCount * sizeof (dgInt8));

	dgStack<dgConvexSimplexEdge*> faceArray (m_edgeCount);
	for (dgInt32 i = 0; i < m_edgeCount; i ++) {
		dgConvexSimplexEdge* const face = &m_simplex[i];
		if (!faceMarks[i]) {
			dgConvexSimplexEdge* ptr = face;
			do {
				dgAssert ((ptr - m_simplex) >= 0);
				faceMarks[dgInt32 (ptr - m_simplex)] = '1';
				ptr = ptr->m_next;
			} while (ptr != face);

			faceArray[m_faceCount] = face;
			m_faceCount ++;
		}
	}
	m_faceArray = (dgConvexSimplexEdge **) m_allocator->Malloc(dgInt32 (m_faceCount * sizeof(dgConvexSimplexEdge *)));
	memcpy (m_faceArray, &faceArray[0], m_faceCount * sizeof(dgConvexSimplexEdge *));
	
	if (vertexCount > DG_CONVEX_VERTEX_CHUNK_SIZE) {
		// create a face structure for support vertex
		dgStack<dgConvexBox> boxTree (vertexCount);
		dgTree<dgVector,dgInt32> sortTree(GetAllocator());
		dgStack<dgTree<dgVector,dgInt32>::dgTreeNode*> vertexNodeList(vertexCount);

		dgVector boxP0 ( dgFloat32 (1.0e15f)); 
		dgVector boxP1 (-dgFloat32 (1.0e15f));
		for (dgInt32 i = 0; i < vertexCount; i ++) {
			const dgVector& p = m_vertex[i];
			vertexNodeList[i] = sortTree.Insert (p, i);
			boxP0 = boxP0.GetMin(p);
			boxP1 = boxP1.GetMax(p);
		}

		boxTree[0].m_box[0] = boxP0 & dgVector::m_triplexMask;
		boxTree[0].m_box[1] = boxP1 & dgVector::m_triplexMask;
		boxTree[0].m_leftBox = -1;
		boxTree[0].m_rightBox = -1;
		boxTree[0].m_vertexStart = 0;
		boxTree[0].m_vertexCount = vertexCount;
		dgInt32 boxCount = 1;

		dgInt32 stack = 1;
		dgInt32 stackBoxPool[64];
		stackBoxPool[0] = 0;

		while (stack) {
			stack --;
			dgInt32 boxIndex = stackBoxPool[stack];
			dgConvexBox& box = boxTree[boxIndex];
			if (box.m_vertexCount > DG_CONVEX_VERTEX_CHUNK_SIZE) {
				dgVector median (dgFloat32 (0.0f));
				dgVector varian (dgFloat32 (0.0f));
				for (dgInt32 i = 0; i < box.m_vertexCount; i ++) {
					dgVector& p = vertexNodeList[box.m_vertexStart + i]->GetInfo();
					boxP0 = boxP0.GetMin(p);
					boxP1 = boxP1.GetMax(p);
					median += p;
					varian += p * p;
				}

				varian = varian.Scale (dgFloat32 (box.m_vertexCount)) - median * median;
				dgInt32 index = 0;
				dgFloat64 maxVarian = dgFloat64 (-1.0e10f);
				for (dgInt32 i = 0; i < 3; i ++) {
					if (varian[i] > maxVarian) {
						index = i;
						maxVarian = varian[i];
					}
				}
				dgVector center = median.Scale (dgFloat32 (1.0f) / dgFloat32 (box.m_vertexCount));
				dgFloat32 test = center[index];

				dgInt32 i0 = 0;
				dgInt32 i1 = box.m_vertexCount - 1;
				do {    
					for (; i0 <= i1; i0 ++) {
						dgFloat32 val = vertexNodeList[box.m_vertexStart + i0]->GetInfo()[index];
						if (val > test) {
							break;
						}
					}

					for (; i1 >= i0; i1 --) {
						dgFloat32 val = vertexNodeList[box.m_vertexStart + i1]->GetInfo()[index];
						if (val < test) {
							break;
						}
					}

					if (i0 < i1)	{
						dgSwap(vertexNodeList[box.m_vertexStart + i0], vertexNodeList[box.m_vertexStart + i1]);
						i0++; 
						i1--;
					}
				} while (i0 <= i1);

				if (i0 == 0){
					i0 = box.m_vertexCount / 2;
				}
				if (i0 >= (box.m_vertexCount - 1)){
					i0 = box.m_vertexCount / 2;
				}


				{
					// insert right branch AABB
					dgVector rightBoxP0 ( dgFloat32 (1.0e15f)); 
					dgVector rightBoxP1 (-dgFloat32 (1.0e15f)); 	
					for (dgInt32 i = i0; i < box.m_vertexCount; i ++) {
						const dgVector& p = vertexNodeList[box.m_vertexStart + i]->GetInfo();
						rightBoxP0 = rightBoxP0.GetMin(p);
						rightBoxP1 = rightBoxP1.GetMax(p);
					}

					box.m_rightBox = boxCount;
					boxTree[boxCount].m_box[0] = rightBoxP0 & dgVector::m_triplexMask;
					boxTree[boxCount].m_box[1] = rightBoxP1 & dgVector::m_triplexMask;
					boxTree[boxCount].m_leftBox = -1;
					boxTree[boxCount].m_rightBox = -1;
					boxTree[boxCount].m_vertexStart = box.m_vertexStart + i0;
					boxTree[boxCount].m_vertexCount = box.m_vertexCount - i0;
					stackBoxPool[stack] = boxCount;
					stack ++;
					boxCount ++;
				}

				{
					// insert left branch AABB
					dgVector leftBoxP0 ( dgFloat32 (1.0e15f));
					dgVector leftBoxP1 (-dgFloat32 (1.0e15f));
					for (dgInt32 i = 0; i < i0; i ++) {
						const dgVector& p = vertexNodeList[box.m_vertexStart + i]->GetInfo();
						leftBoxP0 = leftBoxP0.GetMin(p);
						leftBoxP1 = leftBoxP1.GetMax(p);
					}

					box.m_leftBox = boxCount;
					boxTree[boxCount].m_box[0] = leftBoxP0 & dgVector::m_triplexMask;;
					boxTree[boxCount].m_box[1] = leftBoxP1 & dgVector::m_triplexMask;;
					boxTree[boxCount].m_leftBox = -1;
					boxTree[boxCount].m_rightBox = -1;
					boxTree[boxCount].m_vertexStart = box.m_vertexStart;
					boxTree[boxCount].m_vertexCount = i0;
					stackBoxPool[stack] = boxCount;
					stack ++;
					boxCount ++;
				}
			}
		}

		for (dgInt32 i = 0; i < m_vertexCount; i ++) {
			m_vertex[i] = vertexNodeList[i]->GetInfo();
			vertexNodeList[i]->GetInfo().m_w = dgFloat32 (i);
		}

		m_supportTreeCount = boxCount;
		m_supportTree = (dgConvexBox*) m_allocator->Malloc(dgInt32 (boxCount * sizeof(dgConvexBox)));		
		memcpy (m_supportTree, &boxTree[0], boxCount * sizeof(dgConvexBox));

		for (dgInt32 i = 0; i < m_edgeCount; i ++) {
			dgConvexSimplexEdge* const ptr = &m_simplex[i];
			dgTree<dgVector,dgInt32>::dgTreeNode* const node = sortTree.Find(ptr->m_vertex);
			dgInt32 index = dgInt32 (node->GetInfo().m_w);
			ptr->m_vertex = dgInt16 (index);
		}
	}

	for (dgInt32 i = 0; i < m_edgeCount; i ++) {
		dgConvexSimplexEdge* const edge = &m_simplex[i];
		m_vertexToEdgeMapping[edge->m_vertex] = edge;
	}


	SetVolumeAndCG ();
	return true;
}