// Find a 1:1 mapping from rows to columns of the specified square matrix such that the total cost is minimized. Returns a
// vector V such that V[i] = j maps rows i to columns j.
static std::vector<long>
findMinimumAssignment(const DistanceMatrix &matrix) {
#ifdef ROSE_HAVE_DLIB
    ASSERT_forbid(matrix.size() == 0);
    ASSERT_require(matrix.nr() == matrix.nc());

    // We can avoid the O(n^3) Kuhn-Munkres algorithm if all values of the matrix are the same.
    double minValue, maxValue;
    dlib::find_min_and_max(matrix, minValue /*out*/, maxValue /*out*/);
    if (minValue == maxValue) {
        std::vector<long> ident;
        ident.reserve(matrix.nr());
        for (long i=0; i<matrix.nr(); ++i)
            ident.push_back(i);
        return ident;
    }

    // Dlib's Kuhn-Munkres finds the *maximum* mapping over *integers*, so we negate everything to find the minumum, and we map
    // the doubles onto a reasonably large interval of integers. The interval should be large enough to have some precision,
    // but not so large that things might overflow.
    const int iGreatest = 1000000;                      // arbitrary upper bound for integer interval
    dlib::matrix<long> intMatrix(matrix.nr(), matrix.nc());
    for (long i=0; i<matrix.nr(); ++i) {
        for (long j=0; j<matrix.nc(); ++j)
            intMatrix(i, j) = round(-iGreatest * (matrix(i, j) - minValue) / (maxValue - minValue));
    }
    return dlib::max_cost_assignment(intMatrix);
#else
    throw FunctionSimilarity::Exception("dlib support is necessary for FunctionSimilarity analysis"
                                        "; see ROSE installation instructions");
#endif
}
    void testUpdateDistanceMatrix() 
	{

        PeakListCollection PLC = SamplePeakListCollection();

        DistanceMatrix DM = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.);
        DistanceMatrix Ref = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.);

        DM.addElement(99.); //Add a dummy element, we will delete it again

        unsigned int merged_lower = 4; //last two elements
        unsigned int merged_upper = 5;

        PLC.plContent_.clear();
        PLC.plContent_.resize(6,1);

        PLC.updateDistanceMatrix_(DM, merged_lower, merged_upper, 200., 0.5, 0, 0.);

        //Size should still be 5
        shouldEqual((int)DM.size(),5);

        //Matrix should be the same as the original matrix Ref
        for(unsigned int i = 0; i < 5; i++){
            for(unsigned int j = 0; j < i; j++){
                shouldEqualTolerance(DM(i,j),Ref(i,j),TOL);
            }
        }

        return;
    }
Пример #3
0
    DBL_MATRIX PeakListCollection::mergeAll(double drt, double dmz, double dz, double dint)
    {
        //preprocessing: delete empty PeakLists from c_
        for(unsigned int i = 0; i < c_.size(); i++){
            if(c_[i].size() == 0){
                c_.erase(c_.begin() + i);
            }
        }

        unsigned int oldSize = c_.size();

        // initialization

        //fill correspondenceMap with rt-values
        correspondenceMap_.clear();
        correspondenceMap_.resize(oldSize);

        for(unsigned int i = 0; i < oldSize; i++){
            correspondenceMap_[i].resize(c_[i].size());
            for(unsigned int j = 0; j < c_[i].size(); j++){
                //create map_item to write to correspondenceMap
                map_item tempItem;
                //origin information contains the PeakList index and rt value
                originInformation o;
                o.rt = c_[i][j].getRt();
                o.mz = c_[i][j].getMz();
                o.intensity = c_[i][j].getAbundance();
                o.originPeakList = i;
                o.originPeak = j;
                tempItem.push_back( o );
                correspondenceMap_[i][j] = tempItem;
            }
        }

        //PeakLists are not merged in the beginning --> fill with 1
        plContent_.clear();
        plContent_.resize(oldSize,1);

        //build distance matrix
        DistanceMatrix D = buildDistanceMatrix_(drt,dmz,dz,dint);

        //merge, until there is only one PeakList left

        while(D.size() > 1){
            //find cheapest assignment
            unsigned int merge_lower = D.getMin_LowerIndex();
            unsigned int merge_upper = D.getMin_HigherIndex();

            //merge peaklists into a new one
            mergePeakLists_(merge_lower, merge_upper, c_, drt, dmz, dz, dint);

            //update distance matrix
            updateDistanceMatrix_(D, merge_lower, merge_upper, drt, dmz, dz, dint);
        }

        return calculateRtCorrespondencesFromCorrespondenceMap_(oldSize);

    }
    void testBuildDistanceMatrix() 
	{

        PeakListCollection PLC = SamplePeakListCollection();

        DistanceMatrix DM = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.);

        //PeakList 4 should have no Peaks in common with the other lists --> Cost == DBL_MAX
        for(unsigned int i = 0; i < DM.size()-1; i++){
            shouldEqual(DM(i,4),DBL_MAX);
        }

        //distance PL0 - PL2 should be sqrt(0.1^2/0.5^2 + 30^2/200^2) = 0.25
        shouldEqualTolerance(DM(0,2),0.25,TOL);
        shouldEqualTolerance(DM(2,0),0.25,TOL);

        return;
    }
Пример #5
0
    //Update the Distance Matrix
    void PeakListCollection::updateDistanceMatrix_(DistanceMatrix& D, unsigned int merged_lower, unsigned int merged_upper,
                                                   double drt, double dmz, double dz, double dint = 0.)
    {

        //remove distances of merged PeakLists from distance matrix
        D.deleteElement(merged_upper);
        D.deleteElement(merged_lower);

        //add a new column for the merged PeakList
        D.addElement();

        //fill distances
        unsigned int sz = D.size();
        for(unsigned int i = 0; i<sz-1; i++){
            StableMarriage sm(c_[i],c_[sz-1],drt,dmz,dz,dint);
            sm.setLimit(1.);
            D(i,sz-1) = sm.getCost();
        }

        return;
    }
Пример #6
0
    DBL_MATRIX PeakListCollection::getAlignment(double drt, double dmz, double dz, double dint, AccelerationFlag flag = NORMAL, int param = 0)
    {

        //Create a copy of c_ to work on
        std::vector<PeakList> pls = c_;

        unsigned int oldSize = pls.size();

        // initialization

        //fill correspondenceMap with rt-values
        correspondenceMap_.clear();
        correspondenceMap_.resize(oldSize);

        for(unsigned int i = 0; i < oldSize; i++){
            correspondenceMap_[i].resize(pls[i].size());
            for(unsigned int j = 0; j < pls[i].size(); j++){
                //create map_item to write to correspondenceMap
                map_item tempItem;
                //origin information contains the PeakList index and rt value
                originInformation o;
                o.rt = c_[i][j].getRt();
                o.mz = c_[i][j].getMz();
                o.intensity = c_[i][j].getAbundance();
                o.originPeakList = i;
                o.originPeak = j;
                tempItem.push_back( o );
                correspondenceMap_[i][j] = tempItem;
            }
        }

        //PeakLists are not merged in the beginning --> fill with 1
        plContent_.clear();
        plContent_.resize(oldSize,1);

        DistanceMatrix D;
        if(flag == NORMAL){
            //build distance matrix
            D = buildDistanceMatrix_(drt,dmz,dz,dint);
        }

        if(flag == REFERENCE){
            //first, merge PeakList n with one other --> last PeakList contains PeakList n
            if(param==0){
                //select first two peaklists
                unsigned int merge_lower = param;
                unsigned int merge_upper = 1;

                //merge peaklists into a new one
                mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint);
            }else{
                //select first and n'th PeakList
                unsigned int merge_lower = 0;
                unsigned int merge_upper = param;

                //merge peaklists into a new one
                mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint);
            }
        }

        //merge, until there is only one PeakList left
        bool exit = false;
        while(!exit){
            //find cheapest assignment
            unsigned int merge_lower = 0;
            unsigned int merge_upper = 1;
            switch(flag){
            case NORMAL:
                merge_lower = D.getMin_LowerIndex();
                merge_upper = D.getMin_HigherIndex();
                break;

            case FAST:
                //select last two PeakList
                merge_lower = pls.size()-2;
                merge_upper = pls.size()-1;
                break;

            case REFERENCE:
                //select last two PeakList
                merge_lower = pls.size()-2;
                merge_upper = pls.size()-1;
                break;
            }

            //merge peaklists into a new one
            mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint);


            switch(flag){
            case NORMAL:
                //update distance matrix
                updateDistanceMatrix_(D, merge_lower, merge_upper, drt, dmz, dz, dint);
                if(D.size()==1){
                    exit = true;
                }
                break;

            case FAST:
                if(pls.size()==1){
                    exit = true;
                }
                break;

            case REFERENCE:
                if(pls.size()==1){
                    exit = true;
                }
                break;
            }
        }

        DBL_MATRIX rtc = calculateRtCorrespondencesFromCorrespondenceMap_(oldSize);

        return rtc;


    }
Пример #7
0
TreeTemplate<Node>* OptimizationTools::buildDistanceTree(
  DistanceEstimation& estimationMethod,
  AgglomerativeDistanceMethod& reconstructionMethod,
  const ParameterList& parametersToIgnore,
  bool optimizeBrLen,
  const std::string& param,
  double tolerance,
  unsigned int tlEvalMax,
  OutputStream* profiler,
  OutputStream* messenger,
  unsigned int verbose) throw (Exception)
{
  estimationMethod.resetAdditionalParameters();
  estimationMethod.setVerbose(verbose);
  if (param == DISTANCEMETHOD_PAIRWISE)
  {
    ParameterList tmp = estimationMethod.getSubstitutionModel().getIndependentParameters();
    tmp.addParameters(estimationMethod.getRateDistribution().getIndependentParameters());
    tmp.deleteParameters(parametersToIgnore.getParameterNames());
    estimationMethod.setAdditionalParameters(tmp);
  }
  TreeTemplate<Node>* tree = NULL;
  TreeTemplate<Node>* previousTree = NULL;
  bool test = true;
  while (test)
  {
    // Compute matrice:
    if (verbose > 0)
      ApplicationTools::displayTask("Estimating distance matrix", true);
    estimationMethod.computeMatrix();
    DistanceMatrix* matrix = estimationMethod.getMatrix();
    if (verbose > 0)
      ApplicationTools::displayTaskDone();

    // Compute tree:
    if (matrix->size() == 2) {
      //Special case, there is only one possible tree:
      Node* n1 = new Node(0);
      Node* n2 = new Node(1, matrix->getName(0));
      n2->setDistanceToFather((*matrix)(0,0) / 2.);
      Node* n3 = new Node(2, matrix->getName(1));
      n3->setDistanceToFather((*matrix)(0,0) / 2.);
      n1->addSon(n2);
      n1->addSon(n3);
      tree = new TreeTemplate<Node>(n1);
      break;
    }
    if (verbose > 0)
      ApplicationTools::displayTask("Building tree");
    reconstructionMethod.setDistanceMatrix(*matrix);
    reconstructionMethod.computeTree();
    previousTree = tree;
    delete matrix;
    tree = dynamic_cast<TreeTemplate<Node>*>(reconstructionMethod.getTree());
    if (verbose > 0)
      ApplicationTools::displayTaskDone();
    if (previousTree && verbose > 0)
    {
      int rf = TreeTools::robinsonFouldsDistance(*previousTree, *tree, false);
      ApplicationTools::displayResult("Topo. distance with previous iteration", TextTools::toString(rf));
      test = (rf == 0);
      delete previousTree;
    }
    if (param != DISTANCEMETHOD_ITERATIONS)
      break;  // Ends here.

    // Now, re-estimate parameters:
    auto_ptr<SubstitutionModel> model(estimationMethod.getSubstitutionModel().clone());
    auto_ptr<DiscreteDistribution> rdist(estimationMethod.getRateDistribution().clone());
    DRHomogeneousTreeLikelihood tl(*tree,
        *estimationMethod.getData(),
        model.get(),
        rdist.get(),
        true, verbose > 1);
    tl.initialize();
    ParameterList parameters = tl.getParameters();
    if (!optimizeBrLen)
    {
      vector<string> vs = tl.getBranchLengthsParameters().getParameterNames();
      parameters.deleteParameters(vs);
    }
    parameters.deleteParameters(parametersToIgnore.getParameterNames());
    optimizeNumericalParameters(&tl, parameters, NULL, 0, tolerance, tlEvalMax, messenger, profiler, verbose > 0 ? verbose - 1 : 0);
    if (verbose > 0)
    {
      ParameterList tmp = tl.getSubstitutionModelParameters();
      for (unsigned int i = 0; i < tmp.size(); i++)
      {
        ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
      }
      tmp = tl.getRateDistributionParameters();
      for (unsigned int i = 0; i < tmp.size(); i++)
      {
        ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
      }
    }
  }
  return tree;
}
Пример #8
0
string Alignment::_computeTree(DistanceMatrix dists, DistanceMatrix vars) throw (Exception) {
    // Initialization:
    std::map<size_t, Node*> currentNodes_;
    std::vector<double> sumDist_(dists.size());
    double lambda_;

    for (size_t i = 0; i < dists.size(); i++) {
        currentNodes_[i] = new Node(static_cast<int>(i), dists.getName(i));
    }
    int idNextNode = dists.size();
    vector<double> newDist(dists.size());
    vector<double> newVar(dists.size());

    // Build tree:
    while (currentNodes_.size() > 3) {
        // get best pair
        for (std::map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
            size_t id = i->first;
            sumDist_[id] = 0;
            for (map<size_t, Node*>::iterator j = currentNodes_.begin(); j != currentNodes_.end(); j++) {
                size_t jd = j->first;
                sumDist_[id] += dists(id, jd);
            }
        }
        vector<size_t> bestPair(2);
        double critMax = std::log(0.);
        for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
            size_t id = i->first;
            map<size_t, Node*>::iterator j = i;
            j++;
            for ( ; j != currentNodes_.end(); j++) {
                size_t jd = j->first;
                double crit = sumDist_[id] + sumDist_[jd] - static_cast<double>(currentNodes_.size() - 2) * dists(id, jd);
                // cout << "\t" << id << "\t" << jd << "\t" << crit << endl;
                if (crit > critMax) {
                    critMax = crit;
                    bestPair[0] = id;
                    bestPair[1] = jd;
                }
            }
        }
        if (critMax == std::log(0.)) throw Exception("Unexpected error: no maximum criterium found.");

        // get branch lengths for pair
        double ratio = (sumDist_[bestPair[0]] - sumDist_[bestPair[1]]) / static_cast<double>(currentNodes_.size() - 2);
        vector<double> d(2);

        d[0] = std::max(.5 * (dists(bestPair[0], bestPair[1]) + ratio), MIN_BRANCH_LENGTH);
        d[1] = std::max(.5 * (dists(bestPair[0], bestPair[1]) - ratio), MIN_BRANCH_LENGTH);

        Node* best1 = currentNodes_[bestPair[0]];
        Node* best2 = currentNodes_[bestPair[1]];

        // Distances may be used by getParentNodes (PGMA for instance).
        best1->setDistanceToFather(d[0]);
        best2->setDistanceToFather(d[1]);
        Node* parent = new Node(idNextNode++);
        parent->addSon(best1);
        parent->addSon(best2);

        // compute lambda
        lambda_ = 0;
        if (vars(bestPair[0], bestPair[1]) == 0) lambda_ = .5;
        else {
            for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
                size_t id = i->first;
                if (id != bestPair[0] && id != bestPair[1]) lambda_ += (vars(bestPair[1], id) - vars(bestPair[0], id));
            }
            double div = 2 * static_cast<double>(currentNodes_.size() - 2) * vars(bestPair[0], bestPair[1]);
            lambda_ /= div;
            lambda_ += .5;
        }
        if (lambda_ < 0.) lambda_ = 0.;
        if (lambda_ > 1.) lambda_ = 1.;

        for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
            size_t id = i->first;
            if (id != bestPair[0] && id != bestPair[1]) {
                newDist[id] = std::max(lambda_ * (dists(bestPair[0], id) - d[0]) + (1 - lambda_) * (dists(bestPair[1], id) - d[1]), 0.);
                newVar[id] = lambda_ * vars(bestPair[0], id) + (1 - lambda_) * vars(bestPair[1], id) - lambda_ * (1 - lambda_) * vars(bestPair[0], bestPair[1]);
            }
          else newDist[id] = 0;
        }
        // Actualize currentNodes_:
        currentNodes_[bestPair[0]] = parent;
        currentNodes_.erase(bestPair[1]);
        for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
            size_t id = i->first;
            dists(bestPair[0], id) = dists(id, bestPair[0]) = newDist[id];
            vars(bestPair[0], id) =  vars(id, bestPair[0]) = newVar[id];
        }
    }
    // final step
    Node* root = new Node(idNextNode);
    map<size_t, Node* >::iterator it = currentNodes_.begin();
    size_t i1 = it->first;
    Node* n1       = it->second;
    it++;
    size_t i2 = it->first;
    Node* n2       = it->second;
    if (currentNodes_.size() == 2) {
        // Rooted
        double d = dists(i1, i2) / 2;
        root->addSon(n1);
        root->addSon(n2);
        n1->setDistanceToFather(d);
        n2->setDistanceToFather(d);
    }
    else {
        // Unrooted
        it++;
        size_t i3 = it->first;
        Node* n3       = it->second;
        double d1 = std::max(dists(i1, i2) + dists(i1, i3) - dists(i2, i3), MIN_BRANCH_LENGTH);
        double d2 = std::max(dists(i2, i1) + dists(i2, i3) - dists(i1, i3), MIN_BRANCH_LENGTH);
        double d3 = std::max(dists(i3, i1) + dists(i3, i2) - dists(i1, i2), MIN_BRANCH_LENGTH);
        root->addSon(n1);
        root->addSon(n2);
        root->addSon(n3);
        n1->setDistanceToFather(d1 / 2.);
        n2->setDistanceToFather(d2 / 2.);
        n3->setDistanceToFather(d3 / 2.);
    }
    Tree *tree_ = new TreeTemplate<Node>(root);
    stringstream ss;
    Newick treeWriter;
    if (!tree_) throw Exception("The tree is empty");
    treeWriter.write(*tree_, ss);
    delete tree_;
    string s{ss.str()};
    s.erase(s.find_last_not_of(" \n\r\t")+1);
    return s;
}