void generate(size_t start, size_t end) const
    {
      float* nodes = m_Nodes->getPointer(0);
      int64_t* triangles = m_Triangles->getPointer(0);

      for (size_t i = start; i < end; i++)
      {
        m_Centroids[i * 3]  = (nodes[triangles[i * 3] * 3 + 0] + nodes[triangles[i * 3 + 1] * 3 + 0] + nodes[triangles[i * 3 + 2] * 3 + 0]) / 3.0;
        m_Centroids[i * 3 + 1] = (nodes[triangles[i * 3] * 3 + 1] + nodes[triangles[i * 3 + 1] * 3 + 1] + nodes[triangles[i * 3 + 2] * 3 + 1]) / 3.0;
        m_Centroids[i * 3 + 2]  = (nodes[triangles[i * 3] * 3 + 2] + nodes[triangles[i * 3 + 1] * 3 + 2] + nodes[triangles[i * 3 + 2] * 3 + 2]) / 3.0;
      }
    }
    void generate(size_t start, size_t end) const
    {
        int64_t* triangles = m_Triangles->getPointer(0);

        for (size_t i = start; i < end; i++)
        {
            // Swap the indices
            int64_t nId0 = triangles[i * 3 + 0];
            int64_t nId2 = triangles[i * 3 + 2];

            triangles[i * 3 + 0] = nId2;
            triangles[i * 3 + 2] = nId0;
        }
    }
    void generate(size_t start, size_t end) const
    {
      float* nodes = m_Nodes->getPointer(0);
      int64_t* triangles = m_Triangles->getPointer(0);
      int64_t nIdx0 = 0, nIdx1 = 0, nIdx2 = 0;
      for (size_t i = start; i < end; i++)
      {
        nIdx0 = triangles[i * 3] * 3;
        nIdx1 = triangles[i * 3 + 1] * 3;
        nIdx2 = triangles[i * 3 + 2] * 3;
        float* n0 = &(nodes[nIdx0]);
        float* n1 = &(nodes[nIdx1]);
        float* n2 = &(nodes[nIdx2]);

        VectorType normal = TriangleOps::computeNormal(n0, n1, n2);
        m_Normals[i * 3 + 0] = normal.x;
        m_Normals[i * 3 + 1] = normal.y;
        m_Normals[i * 3 + 2] = normal.z;
      }
    }
    void generate(size_t start, size_t end) const
    {
      int64_t* triangles = m_Triangles->getPointer(0);
      int64_t nIdx0 = 0, nIdx1 = 0, nIdx2 = 0;
      float vecA[3] = { 0.0f, 0.0f, 0.0f };
      float vecB[3] = { 0.0f, 0.0f, 0.0f };
      float cross[3] = { 0.0f, 0.0f, 0.0f };
      for (size_t i = start; i < end; i++)
      {
        nIdx0 = triangles[i * 3];
        nIdx1 = triangles[i * 3 + 1];
        nIdx2 = triangles[i * 3 + 2];
        float* A = m_Nodes->getPointer(nIdx0 * 3);
        float* B = m_Nodes->getPointer(nIdx1 * 3);
        float* C = m_Nodes->getPointer(nIdx2 * 3);

        MatrixMath::Subtract3x1s(A, B, vecA);
        MatrixMath::Subtract3x1s(A, C, vecB);
        MatrixMath::CrossProduct(vecA, vecB, cross);
        float area = 0.5f * MatrixMath::Magnitude3x1(cross);
        m_Areas[i] = area;
      }
    }