void CompleteLinkage::operator()(DistanceMatrix<float> & original_distance, std::vector<BinaryTreeNode> & cluster_tree, const float threshold /*=1*/) const
  {
    // attention: clustering process is done by clustering the indices
    // pointing to elements in inputvector and distances in inputmatrix

    // input MUST have >= 2 elements!
    if (original_distance.dimensionsize() < 2)
    {
      throw ClusterFunctor::InsufficientInput(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Distance matrix to start from only contains one element");
    }

    std::vector<std::set<Size> > clusters(original_distance.dimensionsize());
    for (Size i = 0; i < original_distance.dimensionsize(); ++i)
    {
      clusters[i].insert(i);
    }

    cluster_tree.clear();
    cluster_tree.reserve(original_distance.dimensionsize() - 1);

    // Initial minimum-distance pair
    original_distance.updateMinElement();
    std::pair<Size, Size> min = original_distance.getMinElementCoordinates();

    Size overall_cluster_steps(original_distance.dimensionsize());
    startProgress(0, original_distance.dimensionsize(), "clustering data");

    while (original_distance(min.first, min.second) < threshold)
    {
      //grow the tree
      cluster_tree.push_back(BinaryTreeNode(*(clusters[min.second].begin()), *(clusters[min.first].begin()), original_distance(min.first, min.second)));
      if (cluster_tree.back().left_child > cluster_tree.back().right_child)
      {
        std::swap(cluster_tree.back().left_child, cluster_tree.back().right_child);
      }

      if (original_distance.dimensionsize() > 2)
      {
        //pick minimum-distance pair i,j and merge them

        //pushback elements of second to first (and then erase second)
        clusters[min.second].insert(clusters[min.first].begin(), clusters[min.first].end());
        // erase first one
        clusters.erase(clusters.begin() + min.first);

        //update original_distance matrix
        //complete linkage: new distance between clusters is the minimum distance between elements of each cluster
        //lance-williams update for d((i,j),k): 0.5* d(i,k) + 0.5* d(j,k) + 0.5* |d(i,k)-d(j,k)|
        for (Size k = 0; k < min.second; ++k)
        {
          float dik = original_distance.getValue(min.first, k);
          float djk = original_distance.getValue(min.second, k);
          original_distance.setValueQuick(min.second, k, (0.5f * dik + 0.5f * djk + 0.5f * std::fabs(dik - djk)));
        }
        for (Size k = min.second + 1; k < original_distance.dimensionsize(); ++k)
        {
          float dik = original_distance.getValue(min.first, k);
          float djk = original_distance.getValue(min.second, k);
          original_distance.setValueQuick(k, min.second, (0.5f * dik + 0.5f * djk + 0.5f * std::fabs(dik - djk)));
        }

        //reduce
        original_distance.reduce(min.first);

        //update minimum-distance pair
        original_distance.updateMinElement();

        //get new min-pair
        min = original_distance.getMinElementCoordinates();
      }
      else
      {
        break;
      }
      setProgress(overall_cluster_steps - original_distance.dimensionsize());

      //repeat until only two cluster remains or threshold exceeded, last step skips matrix operations
    }
    //fill tree with dummy nodes
    Size sad(*clusters.front().begin());
    for (Size i = 1; i < clusters.size() && (cluster_tree.size() < cluster_tree.capacity()); ++i)
    {
      cluster_tree.push_back(BinaryTreeNode(sad, *clusters[i].begin(), -1.0));
    }
    //~ while(cluster_tree.size() < cluster_tree.capacity())
    //~ {
    //~ cluster_tree.push_back(BinaryTreeNode(0,1,-1.0));
    //~ }

    endProgress();
  }
Beispiel #2
0
  void AverageLinkage::operator()(DistanceMatrix<float> & original_distance, std::vector<BinaryTreeNode> & cluster_tree, const float threshold /*=1*/) const
  {
    // input MUST have >= 2 elements!
    if (original_distance.dimensionsize() < 2)
    {
      throw ClusterFunctor::InsufficientInput(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Distance matrix to start from only contains one element");
    }

    std::vector<std::set<Size> > clusters(original_distance.dimensionsize());
    for (Size i = 0; i < original_distance.dimensionsize(); ++i)
    {
      clusters[i].insert(i);
    }

    cluster_tree.clear();
    cluster_tree.reserve(original_distance.dimensionsize() - 1);

    // Initial minimum-distance pair
    original_distance.updateMinElement();
    std::pair<Size, Size> min = original_distance.getMinElementCoordinates();

    Size overall_cluster_steps(original_distance.dimensionsize());
    startProgress(0, original_distance.dimensionsize(), "clustering data");

    while (original_distance(min.second, min.first) < threshold)
    {
      //grow the tree
      cluster_tree.push_back(BinaryTreeNode(*(clusters[min.second].begin()), *(clusters[min.first].begin()), original_distance(min.first, min.second)));
      if (cluster_tree.back().left_child > cluster_tree.back().right_child)
      {
        std::swap(cluster_tree.back().left_child, cluster_tree.back().right_child);
      }

      if (original_distance.dimensionsize() > 2)
      {
        //pick minimum-distance pair i,j and merge them

        //calculate parameter for lance-williams formula
        float alpha_i = (float)(clusters[min.first].size() / (float)(clusters[min.first].size() + clusters[min.second].size()));
        float alpha_j = (float)(clusters[min.second].size() / (float)(clusters[min.first].size() + clusters[min.second].size()));
        //~ std::cout << alpha_i << '\t' << alpha_j << std::endl;

        //pushback elements of second to first (and then erase second)
        clusters[min.second].insert(clusters[min.first].begin(), clusters[min.first].end());
        // erase first one
        clusters.erase(clusters.begin() + min.first);

        //update original_distance matrix
        //average linkage: new distance between clusters is the minimum distance between elements of each cluster
        //lance-williams update for d((i,j),k): (m_i/m_i+m_j)* d(i,k) + (m_j/m_i+m_j)* d(j,k) ; m_x is the number of elements in cluster x
        for (Size k = 0; k < min.second; ++k)
        {
          float dik = original_distance.getValue(min.first, k);
          float djk = original_distance.getValue(min.second, k);
          original_distance.setValueQuick(min.second, k, (alpha_i * dik + alpha_j * djk));
        }
        for (Size k = min.second + 1; k < original_distance.dimensionsize(); ++k)
        {
          float dik = original_distance.getValue(min.first, k);
          float djk = original_distance.getValue(min.second, k);
          original_distance.setValueQuick(k, min.second, (alpha_i * dik + alpha_j * djk));
        }

        //reduce
        original_distance.reduce(min.first);

        //update minimum-distance pair
        original_distance.updateMinElement();

        //get min-pair from triangular matrix
        min = original_distance.getMinElementCoordinates();
      }
      else
      {
        break;
      }
      setProgress(overall_cluster_steps - original_distance.dimensionsize());

      //repeat until only two cluster remains, last step skips matrix operations
    }
    //fill tree with dummy nodes
    Size sad(*clusters.front().begin());
    for (Size i = 1; (i < clusters.size()) && (cluster_tree.size() < cluster_tree.capacity()); ++i)
    {
      cluster_tree.push_back(BinaryTreeNode(sad, *clusters[i].begin(), -1.0));
    }

    endProgress();
  }