void testUpdateDistanceMatrix() { PeakListCollection PLC = SamplePeakListCollection(); DistanceMatrix DM = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.); DistanceMatrix Ref = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.); DM.addElement(99.); //Add a dummy element, we will delete it again unsigned int merged_lower = 4; //last two elements unsigned int merged_upper = 5; PLC.plContent_.clear(); PLC.plContent_.resize(6,1); PLC.updateDistanceMatrix_(DM, merged_lower, merged_upper, 200., 0.5, 0, 0.); //Size should still be 5 shouldEqual((int)DM.size(),5); //Matrix should be the same as the original matrix Ref for(unsigned int i = 0; i < 5; i++){ for(unsigned int j = 0; j < i; j++){ shouldEqualTolerance(DM(i,j),Ref(i,j),TOL); } } return; }
void TreeTools::midpointRooting(Tree& tree) { throw Exception("TreeTools::midpointRooting(Tree). This function is deprecated, use TreeTemplateTools::midRoot instead!"); if (tree.isRooted()) tree.unroot(); DistanceMatrix* dist = getDistanceMatrix(tree); vector<size_t> pos = MatrixTools::whichMax(dist->asMatrix()); double dmid = (*dist)(pos[0], pos[1]) / 2; int id1 = tree.getLeafId(dist->getName(pos[0])); int id2 = tree.getLeafId(dist->getName(pos[1])); int rootId = tree.getRootId(); double d1 = getDistanceBetweenAnyTwoNodes(tree, id1, rootId); double d2 = getDistanceBetweenAnyTwoNodes(tree, id2, rootId); int current = d2 > d1 ? id2 : id1; delete dist; double l = tree.getDistanceToFather(current); double c = l; while (c < dmid) { current = tree.getFatherId(current); l = tree.getDistanceToFather(current); c += l; } tree.newOutGroup(current); int brother = tree.getSonsId(tree.getRootId())[1]; if (brother == current) brother = tree.getSonsId(tree.getRootId())[0]; tree.setDistanceToFather(current, l - (c - dmid)); tree.setDistanceToFather(brother, c - dmid); }
DBL_MATRIX PeakListCollection::mergeAll(double drt, double dmz, double dz, double dint) { //preprocessing: delete empty PeakLists from c_ for(unsigned int i = 0; i < c_.size(); i++){ if(c_[i].size() == 0){ c_.erase(c_.begin() + i); } } unsigned int oldSize = c_.size(); // initialization //fill correspondenceMap with rt-values correspondenceMap_.clear(); correspondenceMap_.resize(oldSize); for(unsigned int i = 0; i < oldSize; i++){ correspondenceMap_[i].resize(c_[i].size()); for(unsigned int j = 0; j < c_[i].size(); j++){ //create map_item to write to correspondenceMap map_item tempItem; //origin information contains the PeakList index and rt value originInformation o; o.rt = c_[i][j].getRt(); o.mz = c_[i][j].getMz(); o.intensity = c_[i][j].getAbundance(); o.originPeakList = i; o.originPeak = j; tempItem.push_back( o ); correspondenceMap_[i][j] = tempItem; } } //PeakLists are not merged in the beginning --> fill with 1 plContent_.clear(); plContent_.resize(oldSize,1); //build distance matrix DistanceMatrix D = buildDistanceMatrix_(drt,dmz,dz,dint); //merge, until there is only one PeakList left while(D.size() > 1){ //find cheapest assignment unsigned int merge_lower = D.getMin_LowerIndex(); unsigned int merge_upper = D.getMin_HigherIndex(); //merge peaklists into a new one mergePeakLists_(merge_lower, merge_upper, c_, drt, dmz, dz, dint); //update distance matrix updateDistanceMatrix_(D, merge_lower, merge_upper, drt, dmz, dz, dint); } return calculateRtCorrespondencesFromCorrespondenceMap_(oldSize); }
// Find a 1:1 mapping from rows to columns of the specified square matrix such that the total cost is minimized. Returns a // vector V such that V[i] = j maps rows i to columns j. static std::vector<long> findMinimumAssignment(const DistanceMatrix &matrix) { #ifdef ROSE_HAVE_DLIB ASSERT_forbid(matrix.size() == 0); ASSERT_require(matrix.nr() == matrix.nc()); // We can avoid the O(n^3) Kuhn-Munkres algorithm if all values of the matrix are the same. double minValue, maxValue; dlib::find_min_and_max(matrix, minValue /*out*/, maxValue /*out*/); if (minValue == maxValue) { std::vector<long> ident; ident.reserve(matrix.nr()); for (long i=0; i<matrix.nr(); ++i) ident.push_back(i); return ident; } // Dlib's Kuhn-Munkres finds the *maximum* mapping over *integers*, so we negate everything to find the minumum, and we map // the doubles onto a reasonably large interval of integers. The interval should be large enough to have some precision, // but not so large that things might overflow. const int iGreatest = 1000000; // arbitrary upper bound for integer interval dlib::matrix<long> intMatrix(matrix.nr(), matrix.nc()); for (long i=0; i<matrix.nr(); ++i) { for (long j=0; j<matrix.nc(); ++j) intMatrix(i, j) = round(-iGreatest * (matrix(i, j) - minValue) / (maxValue - minValue)); } return dlib::max_cost_assignment(intMatrix); #else throw FunctionSimilarity::Exception("dlib support is necessary for FunctionSimilarity analysis" "; see ROSE installation instructions"); #endif }
// Given a square matrix and a 1:1 mapping from rows to columns, return the total cost of the mapping. static double totalAssignmentCost(const DistanceMatrix &matrix, const std::vector<long> assignment) { double sum = 0.0; ASSERT_require(matrix.nr() == matrix.nc()); ASSERT_require((size_t)matrix.nr() == assignment.size()); for (long i=0; i<matrix.nr(); ++i) { ASSERT_require(assignment[i] < matrix.nc()); sum += matrix(i, assignment[i]); } return sum; }
int main(int argc, char **argv) { welcome(); defineverbose(); filenames.clear(); ClustalWInitializers(); clustalw::ClustalWResources *resources = clustalw::ClustalWResources::Instance(); resources->setPathToExecutable(string(argv[0])); setUserParameters(); InputFile start;// object reads from an input file and creates a new file start.run(); string offendingSeq; Clustal* clustalObj; clustalObj = new clustalw::Clustal(); int u = clustalObj->sequenceInput(false, &offendingSeq); string phylipName; clustalObj->align(&phylipName); AminoAcidFrequency Table; Table.openFile("TEST-OutputFile.aln"); vector<ProteinSequence> p1; p1 = start.getProteinData(); Table.generateAminoAcidTables(p1); p1.clear(); p1 = Table.getFinalSeqs(); DistanceMatrix dm; dm.createDistanceTableCodes(p1); dm.createSimilarityMatrixCodes(p1); dm.createDistanceTableColours(p1); dm.createSimilarityMatrixColours(p1); VerticalPosition vp; vp.run(p1); cout<< "\nPROCESS COMPLETED!\nThe following files were created:\n" << endl; for ( size_t i = 0; i < filenames.size(); i++ ) { cout << "\t" << i+1 << ". " << filenames[i] << endl; } return 0; }
void testBuildDistanceMatrix() { PeakListCollection PLC = SamplePeakListCollection(); DistanceMatrix DM = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.); //PeakList 4 should have no Peaks in common with the other lists --> Cost == DBL_MAX for(unsigned int i = 0; i < DM.size()-1; i++){ shouldEqual(DM(i,4),DBL_MAX); } //distance PL0 - PL2 should be sqrt(0.1^2/0.5^2 + 30^2/200^2) = 0.25 shouldEqualTolerance(DM(0,2),0.25,TOL); shouldEqualTolerance(DM(2,0),0.25,TOL); return; }
//build a distance matrix out of all PeakLists DistanceMatrix PeakListCollection::buildDistanceMatrix_(double drt, double dmz, double dz, double dint = 0.) { //number of Peaklists unsigned int numPl = c_.size(); DistanceMatrix DM; //Add new elements to the Distance Matrix for(unsigned int i = 0; i<numPl; i++){ DM.addElement(); //Calculate distance to the other elements / the cost of merging them for(unsigned int j = 0; j<i; j++){ StableMarriage sm(c_[i],c_[j],drt,dmz,dz,dint); sm.setLimit(1.); DM(i,j) = sm.getCost(); } } return DM; }
//Update the Distance Matrix void PeakListCollection::updateDistanceMatrix_(DistanceMatrix& D, unsigned int merged_lower, unsigned int merged_upper, double drt, double dmz, double dz, double dint = 0.) { //remove distances of merged PeakLists from distance matrix D.deleteElement(merged_upper); D.deleteElement(merged_lower); //add a new column for the merged PeakList D.addElement(); //fill distances unsigned int sz = D.size(); for(unsigned int i = 0; i<sz-1; i++){ StableMarriage sm(c_[i],c_[sz-1],drt,dmz,dz,dint); sm.setLimit(1.); D(i,sz-1) = sm.getCost(); } return; }
DBL_MATRIX PeakListCollection::getAlignment(double drt, double dmz, double dz, double dint, AccelerationFlag flag = NORMAL, int param = 0) { //Create a copy of c_ to work on std::vector<PeakList> pls = c_; unsigned int oldSize = pls.size(); // initialization //fill correspondenceMap with rt-values correspondenceMap_.clear(); correspondenceMap_.resize(oldSize); for(unsigned int i = 0; i < oldSize; i++){ correspondenceMap_[i].resize(pls[i].size()); for(unsigned int j = 0; j < pls[i].size(); j++){ //create map_item to write to correspondenceMap map_item tempItem; //origin information contains the PeakList index and rt value originInformation o; o.rt = c_[i][j].getRt(); o.mz = c_[i][j].getMz(); o.intensity = c_[i][j].getAbundance(); o.originPeakList = i; o.originPeak = j; tempItem.push_back( o ); correspondenceMap_[i][j] = tempItem; } } //PeakLists are not merged in the beginning --> fill with 1 plContent_.clear(); plContent_.resize(oldSize,1); DistanceMatrix D; if(flag == NORMAL){ //build distance matrix D = buildDistanceMatrix_(drt,dmz,dz,dint); } if(flag == REFERENCE){ //first, merge PeakList n with one other --> last PeakList contains PeakList n if(param==0){ //select first two peaklists unsigned int merge_lower = param; unsigned int merge_upper = 1; //merge peaklists into a new one mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint); }else{ //select first and n'th PeakList unsigned int merge_lower = 0; unsigned int merge_upper = param; //merge peaklists into a new one mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint); } } //merge, until there is only one PeakList left bool exit = false; while(!exit){ //find cheapest assignment unsigned int merge_lower = 0; unsigned int merge_upper = 1; switch(flag){ case NORMAL: merge_lower = D.getMin_LowerIndex(); merge_upper = D.getMin_HigherIndex(); break; case FAST: //select last two PeakList merge_lower = pls.size()-2; merge_upper = pls.size()-1; break; case REFERENCE: //select last two PeakList merge_lower = pls.size()-2; merge_upper = pls.size()-1; break; } //merge peaklists into a new one mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint); switch(flag){ case NORMAL: //update distance matrix updateDistanceMatrix_(D, merge_lower, merge_upper, drt, dmz, dz, dint); if(D.size()==1){ exit = true; } break; case FAST: if(pls.size()==1){ exit = true; } break; case REFERENCE: if(pls.size()==1){ exit = true; } break; } } DBL_MATRIX rtc = calculateRtCorrespondencesFromCorrespondenceMap_(oldSize); return rtc; }
pair<vector<int>, double> DistanceMatrix::compareAllWindows(DistanceMatrix &_distMat, int choice){ vector<MatrixWindow*> listOther = _distMat.getMatrixWindows(); int length = listMW.size(); int lengthOther = listOther.size(); vector<int> minCoord(2, 0.0); MatrixWindow *minWindow1 = NULL; MatrixWindow *minWindow2 = NULL; double minLikeness= MslTools::doubleMax; for (int i =0; i<length; i++){//loops through listMW to get comparee MatrixWindow *win1 = listMW[i]; for (int j=0; j<lengthOther; j++){//loops through listOther to get comparor MatrixWindow *win2 = listOther[j]; double likeness; //decides which compare method from MatrixWindow to call switch(choice){ case standard: likeness = (*win1).compare((*win2)); break; case diag: likeness = (*win1).compareDiagonal((*win2)); break; case doubleDiag: likeness = (*win1).compareDoubleDiagonal((*win2)); break; case minDist: likeness = (*win1).compareMinDist((*win2)); break; case minDistRow: likeness=(*win1).compareMinRow((*win2)); break; case minDistCol: likeness=(*win1).compareMinCol((*win2)); break; default: cout<<"Invalid argument in function DistanceMatrix::compareAll()."<<endl; exit(333); }//end switch if (likeness<minLikeness && abs(likeness - minLikeness) > 0.001){ cout << "New likeness: "<<likeness<<endl; minLikeness=likeness; minWindow1 = win1; minWindow2 = win2; minCoord[0]= i; minCoord[1]=j; }//endif win2=NULL; }//end for on j win1= NULL; }//end for on i pair<vector<int>, double> coordsAndLikeness(minCoord, minLikeness); minWindow1=NULL; minWindow2=NULL; return coordsAndLikeness; }
//must add segID void DistanceMatrix::printCompareInfo(DistanceMatrix &_distMat, pair<vector<int>, double> _result, int choice){ //retrieve values from the pair vector<int> mwIndex(2, 0.0); mwIndex= _result.first; double minLikeness = _result.second; //retrieve the winning Matrix Windows vector<MatrixWindow*> listMW2 = _distMat.getMatrixWindows(); MatrixWindow *minWindow1 = listMW[mwIndex[0]]; MatrixWindow *minWindow2 = listMW2[mwIndex[1]]; //print information int i1 = (*minWindow1).getLeftR(); int j1 = (*minWindow1).getLeftC(); int i2 = (*minWindow2).getLeftR(); int j2 = (*minWindow2).getLeftC(); string i1ID = atomVec[i1]->getSegID().c_str(); string j1ID = atomVec[j1]->getSegID().c_str(); string i2ID = _distMat.getAtomVector()[i2]->getSegID().c_str(); string j2ID = _distMat.getAtomVector()[j2]->getSegID().c_str(); if(i1ID=="" || j1ID=="" || i2ID=="" ||j2ID==""){ i1ID = atomVec[i1]->getChainId().c_str(); j1ID = atomVec[j1]->getChainId().c_str(); i2ID = _distMat.getAtomVector()[i2]->getChainId().c_str(); j2ID = _distMat.getAtomVector()[j2]->getChainId().c_str(); } int i1res = atomVec[i1]->getResidueNumber(); int j1res = atomVec[j1]->getResidueNumber(); int i2res = _distMat.getAtomVector()[i2]->getResidueNumber(); int j2res = _distMat.getAtomVector()[j2]->getResidueNumber(); string PDBname= getFileName(PDBid); string PDBnameShort = PDBname.substr(0,17); string PDBname2 = getFileName(_distMat.getPDBid()); string PDBnameShort2 = PDBname2.substr(0,17); cout<<"Comparing PDBs "<<PDBnameShort<<", "<<PDBnameShort2<<endl; switch(choice){ case standard: fprintf(stdout, "Standard compare:\t\tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness); break; case diag: fprintf(stdout, "Diagonal compare: \t\tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness); break; case doubleDiag: fprintf(stdout, "Double Diagonal compare: \tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness); break; case minDist: fprintf(stdout, "Minimum Distance compare: \tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness); break; case minDistRow: fprintf(stdout, "Minimum Distance Row compare: \tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness); break; case minDistCol: fprintf(stdout, "Minimum Distance Column compare: \tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness); break; default: cout<<"Error. Incorrect int value (choice) in DistanceMatrix::printCompareInfo(...)"<<endl; exit(334); }//end switch }
vector<DistanceMatrixResult> DistanceMatrix::multiCompareAllWindows(DistanceMatrix &_distMat, int choice, int _numCompare){ /* //which chains to skip: map<string, bool> forbiddenIDMat1; map<string, bool> forbiddenIDMat2; // Maintain the proper spacing between residues of different matrix window pairs. map<string,int> properRegisterRow; map<string,int> properRegisterCol; map<string,int>::iterator findRegistry; */ //get list of MWs to compare vector<MatrixWindow*> listOther = _distMat.getMatrixWindows(); int length = listMW.size(); int lengthOther = listOther.size(); //vector of objects to return vector<DistanceMatrixResult> returnVec; //loop over number of times to compare all for(int k=0; k<_numCompare; k++){ //minimum windows and indices MatrixWindow *minWindow1 = NULL; MatrixWindow *minWindow2 = NULL; double minLikeness = 1000000; int minIndex1=0; int minIndex2=0; //IDs to skip in the future string i1IDWin; string j1IDWin; string i2IDWin; string j2IDWin; //which chains to skip: map<string, bool> forbiddenIDMat1; map<string, bool> forbiddenIDMat2; // Maintain the proper spacing between residues of different matrix window pairs. map<string,int> properRegisterRow; map<string,int> properRegisterCol; map<string,int>::iterator findRegistry; for (int i =0; i<length; i++){//loops through listMW to get compare MatrixWindow *win1 = listMW[i]; for (int j=0; j<lengthOther; j++){//loops through listOther to get comparor MatrixWindow *win2 = listOther[j]; //get the ID (seg or chain) so we can filter ones we want to skip int i1 = win1->getLeftR(); int j1 = win1->getLeftC(); int i2 = win2->getLeftR(); int j2 = win2->getLeftC(); string i1ID = atomVec[i1]->getSegID(); string j1ID = atomVec[j1]->getSegID(); string i2ID = _distMat.getAtomVector()[i2]->getSegID(); string j2ID = _distMat.getAtomVector()[j2]->getSegID(); if(i1ID=="" || j1ID=="" || i2ID=="" ||j2ID==""){ i1ID = atomVec[i1]->getChainId(); j1ID = atomVec[j1]->getChainId(); i2ID = _distMat.getAtomVector()[i2]->getChainId(); j2ID = _distMat.getAtomVector()[j2]->getChainId(); }//end if // Skip if both chains are forbidden within a matrix if (forbiddenIDMat1.find(i1ID+":"+j1ID)!=forbiddenIDMat1.end() || forbiddenIDMat2.find(i2ID+":"+j2ID)!=forbiddenIDMat2.end()) continue; // Skip if both inter-matrix segids found and if difference in residue number is not the same. findRegistry = properRegisterRow.find(i1ID+":"+i2ID); double diffInResidueNumber = atomVec[i1]->getResidueNumber() - _distMat.getAtomVector()[i2]->getResidueNumber(); if (findRegistry != properRegisterRow.end() && findRegistry->second != diffInResidueNumber) continue; findRegistry = properRegisterCol.find(j1ID+":"+j2ID); diffInResidueNumber = atomVec[j1]->getResidueNumber() - _distMat.getAtomVector()[j2]->getResidueNumber(); if (findRegistry != properRegisterCol.end() && findRegistry->second != diffInResidueNumber) continue; double likeness; //decides which compare method from MatrixWindow to call switch(choice){ case standard: likeness = (*win1).compare((*win2)); break; case diag: likeness = (*win1).compareDiagonal((*win2)); break; case doubleDiag: likeness = (*win1).compareDoubleDiagonal((*win2)); break; case minDist: likeness = (*win1).compareMinDist((*win2)); break; case minDistRow: likeness=(*win1).compareMinRow((*win2)); break; case minDistCol: likeness=(*win1).compareMinCol((*win2)); break; default: cout<<"Invalid argument in function DistanceMatrix::compareAll()."<<endl; exit(333); }//end switch if (likeness<minLikeness && abs(likeness - minLikeness) > 0.001){ minLikeness = likeness; minWindow1 = win1; minWindow2 = win2; minIndex1 = i; minIndex2 = j; //set the ID to avoid in the future i1IDWin = i1ID; j1IDWin = j1ID; i2IDWin = i2ID; j2IDWin = j2ID; }//endif win2=NULL; }//end for on j win1= NULL; }//end for on i // Allowed inter matrix identifier[SEGID] difference in residue numbers vector<int> residueNumbers1 = minWindow1->getUpLeftResidueNumbers(); vector<int> residueNumbers2 = minWindow2->getUpLeftResidueNumbers(); properRegisterRow[i1IDWin+":"+i2IDWin] = (residueNumbers1[0] - residueNumbers2[0]); properRegisterRow[i2IDWin+":"+i1IDWin] = - (residueNumbers1[0] - residueNumbers2[0]); properRegisterCol[j1IDWin+":"+j2IDWin] = (residueNumbers1[1] - residueNumbers2[1]); properRegisterCol[j2IDWin+":"+j1IDWin] = - (residueNumbers1[1] - residueNumbers2[1]); // Forbidden intra matrix identifier[SEGID] pairs forbiddenIDMat1[i1IDWin+":"+j1IDWin] = false; forbiddenIDMat1[j1IDWin+":"+i1IDWin] = false; forbiddenIDMat2[i2IDWin+":"+j2IDWin] = false; forbiddenIDMat2[j2IDWin+":"+i2IDWin] = false; DistanceMatrixResult currentResult(*this, *minWindow1, _distMat, *minWindow2, minLikeness); returnVec.push_back(currentResult); minWindow1=NULL; minWindow2=NULL; }//end for on k return returnVec; }
TreeTemplate<Node>* OptimizationTools::buildDistanceTree( DistanceEstimation& estimationMethod, AgglomerativeDistanceMethod& reconstructionMethod, const ParameterList& parametersToIgnore, bool optimizeBrLen, const std::string& param, double tolerance, unsigned int tlEvalMax, OutputStream* profiler, OutputStream* messenger, unsigned int verbose) throw (Exception) { estimationMethod.resetAdditionalParameters(); estimationMethod.setVerbose(verbose); if (param == DISTANCEMETHOD_PAIRWISE) { ParameterList tmp = estimationMethod.getSubstitutionModel().getIndependentParameters(); tmp.addParameters(estimationMethod.getRateDistribution().getIndependentParameters()); tmp.deleteParameters(parametersToIgnore.getParameterNames()); estimationMethod.setAdditionalParameters(tmp); } TreeTemplate<Node>* tree = NULL; TreeTemplate<Node>* previousTree = NULL; bool test = true; while (test) { // Compute matrice: if (verbose > 0) ApplicationTools::displayTask("Estimating distance matrix", true); estimationMethod.computeMatrix(); DistanceMatrix* matrix = estimationMethod.getMatrix(); if (verbose > 0) ApplicationTools::displayTaskDone(); // Compute tree: if (matrix->size() == 2) { //Special case, there is only one possible tree: Node* n1 = new Node(0); Node* n2 = new Node(1, matrix->getName(0)); n2->setDistanceToFather((*matrix)(0,0) / 2.); Node* n3 = new Node(2, matrix->getName(1)); n3->setDistanceToFather((*matrix)(0,0) / 2.); n1->addSon(n2); n1->addSon(n3); tree = new TreeTemplate<Node>(n1); break; } if (verbose > 0) ApplicationTools::displayTask("Building tree"); reconstructionMethod.setDistanceMatrix(*matrix); reconstructionMethod.computeTree(); previousTree = tree; delete matrix; tree = dynamic_cast<TreeTemplate<Node>*>(reconstructionMethod.getTree()); if (verbose > 0) ApplicationTools::displayTaskDone(); if (previousTree && verbose > 0) { int rf = TreeTools::robinsonFouldsDistance(*previousTree, *tree, false); ApplicationTools::displayResult("Topo. distance with previous iteration", TextTools::toString(rf)); test = (rf == 0); delete previousTree; } if (param != DISTANCEMETHOD_ITERATIONS) break; // Ends here. // Now, re-estimate parameters: auto_ptr<SubstitutionModel> model(estimationMethod.getSubstitutionModel().clone()); auto_ptr<DiscreteDistribution> rdist(estimationMethod.getRateDistribution().clone()); DRHomogeneousTreeLikelihood tl(*tree, *estimationMethod.getData(), model.get(), rdist.get(), true, verbose > 1); tl.initialize(); ParameterList parameters = tl.getParameters(); if (!optimizeBrLen) { vector<string> vs = tl.getBranchLengthsParameters().getParameterNames(); parameters.deleteParameters(vs); } parameters.deleteParameters(parametersToIgnore.getParameterNames()); optimizeNumericalParameters(&tl, parameters, NULL, 0, tolerance, tlEvalMax, messenger, profiler, verbose > 0 ? verbose - 1 : 0); if (verbose > 0) { ParameterList tmp = tl.getSubstitutionModelParameters(); for (unsigned int i = 0; i < tmp.size(); i++) { ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue())); } tmp = tl.getRateDistributionParameters(); for (unsigned int i = 0; i < tmp.size(); i++) { ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue())); } } } return tree; }
int main(int argc, char *argv[]){ // Option Parser Options opt = setupOptions(argc,argv); ifstream fs2; //create system and dm for first PDB PDBReader reader; reader.open(opt.inputPDB); reader.read(); reader.close(); System *constSys = new System(reader.getAtoms()); DistanceMatrix constDM; //add CA atoms to the atom vectors for (int j=0; j<constSys->residueSize(); j++){ Residue &tempRes=constSys->getResidue(j); if (tempRes.exists("CA")){ constDM.addAtom(tempRes("CA")); } }//end for on j //fill the DistanceMatrix and set window size constDM.setGeneralWinSize(opt.windowSize); constDM.createDistanceMatrix(); constDM.setIntraChain(opt.intraChainCompare); constDM.setPDBid(opt.inputPDB); constDM.setDebug(opt.debug); //create matrix windows constDM.createMatrixWindows(); delete(constSys); if (constDM.getMatrixWindows().size()==0){ cout<<"Uh-oh.All the windows got filtered in the PDB you wanted to compare against."<<endl; exit(111); } // COMMMENT OUT BEGINS /* //read in list of PDBs to compare to first PDB vector<string> list; ifstream fs; fs.open(opt.pdbList.c_str()); if (fs.fail()){ cerr<<"Cannot open file "<<opt.pdbList<<endl; exit(1); } while(true){ string line; getline(fs, line); if(fs.fail()){ //no more lines to read, quite the while. break; } if(line==""){ continue; } list.push_back(line); } fs.close(); // List of distance matrices, one for each PDB vector<DistanceMatrix> DMVec(list.size()); // A system object for each PDB vector<System*> sysVec(list.size(), NULL); // Create DistanceMatrix and System Objects for list of PDBs for(int i=0; i<list.size(); i++){ cout<<i<<"create sys and dm."<<endl; PDBReader rAv; rAv.open(list[i]); rAv.read(); rAv.close(); sysVec[i] =new System(rAv.getAtoms()); //add CA atoms to the atom vectors for (int j=0; j<sysVec[i]->residueSize(); j++){ Residue &tempRes=sysVec[i]->getResidue(j); if (tempRes.exists("CA")){ //only add CA if it is on a helix string segID = tempRes("CA").getSegID(); //if(segID == "" || segID.at(0) == 'H'){ DMVec[i].addAtom(tempRes("CA")); // } } }//end for on j //fill the DistanceMatrix and set window size DMVec[i].setGeneralWinSize(opt.windowSize); DMVec[i].createDistanceMatrix(); DMVec[i].setIntraChain(opt.intraChainCompare); DMVec[i].setPDBid(list[i]); //create matrix windows DMVec[i].createMatrixWindows(); delete(sysVec[i]); }//end for on i */ // COMMMENT OUT ENDS // NEW BEGINS // load distance matrix database from an external binary file DistanceMatrixDatabase dmd; dmd.load_checkpoint("try.bin"); // List of distance matrices, one for each PDB //************* from now on, all DMVec become pointers ****************// vector<DistanceMatrix *> &DMVec = dmd.getDistanceMatrixList(); /* for (uint i = 0;i < dms.size();i++){ cout << "DM["<<i<<"]: "<<dms[i]->getPDBid()<<endl; } cout << "Done"<<endl; */ //ManageResults to take care of printing/sorting at end ManageDistanceMatrixResults resultManager; for(int i=0; i<DMVec.size(); i++){ cout<< "Trying "<<DMVec[i]->getPDBid()<<" ("<<i<<") # Residues: "<<DMVec.size()<<" Number of MatrixWindows to compare: "<<DMVec[i]->getMatrixWindows().size(); //don't compare if all of the windows got filtered out if (DMVec[i]->getMatrixWindows().size() == 0){ cout << " Sorry Zero Matrix Windows !"<<endl; continue; } cout <<endl; vector<DistanceMatrixResult> resultsToAdd; if(opt.searchCriteria=="standard"){ resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::standard, opt.numberOfIterations); }//end if if(opt.searchCriteria=="diagonal"){ resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::diag, opt.numberOfIterations); } if(opt.searchCriteria=="doubleDiagonal"){ resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::doubleDiag, opt.numberOfIterations); } if(opt.searchCriteria=="minDistance"){ resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::minDist, opt.numberOfIterations); } if(opt.searchCriteria=="minDistanceRow"){ resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::minDistRow, opt.numberOfIterations); } bool addFlag = false; for (uint j = 0; j< resultsToAdd.size();j++){ if (opt.likenessTolerance == MslTools::doubleMax || resultsToAdd[j].getLikeness() <= opt.likenessTolerance){ addFlag = true; break; } } if (addFlag && resultsToAdd.size() > 0){ resultManager.addResults(resultsToAdd); } }//end for on i // NEW ENDS cout << "Printing"<<endl; resultManager.setAlignPdbs(opt.alignPdbs); // cout << "hello"<<endl; resultManager.setRmsdTol(opt.rmsdTol); // cout << "hello again"<<endl; resultManager.printResults(); cout << "Done."<<endl; return 0; }
string Alignment::_computeTree(DistanceMatrix dists, DistanceMatrix vars) throw (Exception) { // Initialization: std::map<size_t, Node*> currentNodes_; std::vector<double> sumDist_(dists.size()); double lambda_; for (size_t i = 0; i < dists.size(); i++) { currentNodes_[i] = new Node(static_cast<int>(i), dists.getName(i)); } int idNextNode = dists.size(); vector<double> newDist(dists.size()); vector<double> newVar(dists.size()); // Build tree: while (currentNodes_.size() > 3) { // get best pair for (std::map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; sumDist_[id] = 0; for (map<size_t, Node*>::iterator j = currentNodes_.begin(); j != currentNodes_.end(); j++) { size_t jd = j->first; sumDist_[id] += dists(id, jd); } } vector<size_t> bestPair(2); double critMax = std::log(0.); for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; map<size_t, Node*>::iterator j = i; j++; for ( ; j != currentNodes_.end(); j++) { size_t jd = j->first; double crit = sumDist_[id] + sumDist_[jd] - static_cast<double>(currentNodes_.size() - 2) * dists(id, jd); // cout << "\t" << id << "\t" << jd << "\t" << crit << endl; if (crit > critMax) { critMax = crit; bestPair[0] = id; bestPair[1] = jd; } } } if (critMax == std::log(0.)) throw Exception("Unexpected error: no maximum criterium found."); // get branch lengths for pair double ratio = (sumDist_[bestPair[0]] - sumDist_[bestPair[1]]) / static_cast<double>(currentNodes_.size() - 2); vector<double> d(2); d[0] = std::max(.5 * (dists(bestPair[0], bestPair[1]) + ratio), MIN_BRANCH_LENGTH); d[1] = std::max(.5 * (dists(bestPair[0], bestPair[1]) - ratio), MIN_BRANCH_LENGTH); Node* best1 = currentNodes_[bestPair[0]]; Node* best2 = currentNodes_[bestPair[1]]; // Distances may be used by getParentNodes (PGMA for instance). best1->setDistanceToFather(d[0]); best2->setDistanceToFather(d[1]); Node* parent = new Node(idNextNode++); parent->addSon(best1); parent->addSon(best2); // compute lambda lambda_ = 0; if (vars(bestPair[0], bestPair[1]) == 0) lambda_ = .5; else { for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; if (id != bestPair[0] && id != bestPair[1]) lambda_ += (vars(bestPair[1], id) - vars(bestPair[0], id)); } double div = 2 * static_cast<double>(currentNodes_.size() - 2) * vars(bestPair[0], bestPair[1]); lambda_ /= div; lambda_ += .5; } if (lambda_ < 0.) lambda_ = 0.; if (lambda_ > 1.) lambda_ = 1.; for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; if (id != bestPair[0] && id != bestPair[1]) { newDist[id] = std::max(lambda_ * (dists(bestPair[0], id) - d[0]) + (1 - lambda_) * (dists(bestPair[1], id) - d[1]), 0.); newVar[id] = lambda_ * vars(bestPair[0], id) + (1 - lambda_) * vars(bestPair[1], id) - lambda_ * (1 - lambda_) * vars(bestPair[0], bestPair[1]); } else newDist[id] = 0; } // Actualize currentNodes_: currentNodes_[bestPair[0]] = parent; currentNodes_.erase(bestPair[1]); for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) { size_t id = i->first; dists(bestPair[0], id) = dists(id, bestPair[0]) = newDist[id]; vars(bestPair[0], id) = vars(id, bestPair[0]) = newVar[id]; } } // final step Node* root = new Node(idNextNode); map<size_t, Node* >::iterator it = currentNodes_.begin(); size_t i1 = it->first; Node* n1 = it->second; it++; size_t i2 = it->first; Node* n2 = it->second; if (currentNodes_.size() == 2) { // Rooted double d = dists(i1, i2) / 2; root->addSon(n1); root->addSon(n2); n1->setDistanceToFather(d); n2->setDistanceToFather(d); } else { // Unrooted it++; size_t i3 = it->first; Node* n3 = it->second; double d1 = std::max(dists(i1, i2) + dists(i1, i3) - dists(i2, i3), MIN_BRANCH_LENGTH); double d2 = std::max(dists(i2, i1) + dists(i2, i3) - dists(i1, i3), MIN_BRANCH_LENGTH); double d3 = std::max(dists(i3, i1) + dists(i3, i2) - dists(i1, i2), MIN_BRANCH_LENGTH); root->addSon(n1); root->addSon(n2); root->addSon(n3); n1->setDistanceToFather(d1 / 2.); n2->setDistanceToFather(d2 / 2.); n3->setDistanceToFather(d3 / 2.); } Tree *tree_ = new TreeTemplate<Node>(root); stringstream ss; Newick treeWriter; if (!tree_) throw Exception("The tree is empty"); treeWriter.write(*tree_, ss); delete tree_; string s{ss.str()}; s.erase(s.find_last_not_of(" \n\r\t")+1); return s; }
void SingleLinkage::operator()(DistanceMatrix<float> & original_distance, std::vector<BinaryTreeNode> & cluster_tree, const float threshold /*=1*/) const { // input MUST have >= 2 elements! if (original_distance.dimensionsize() < 2) { throw ClusterFunctor::InsufficientInput(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Distance matrix to start from only contains one element"); } cluster_tree.clear(); if (threshold < 1) { LOG_ERROR << "You tried to use Single Linkage clustering with a threshold. This is currently not supported!" << std::endl; throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); } //SLINK std::vector<Size> pi; pi.reserve(original_distance.dimensionsize()); std::vector<float> lambda; lambda.reserve(original_distance.dimensionsize()); startProgress(0, original_distance.dimensionsize(), "clustering data"); //initialize first pointer values pi.push_back(0); lambda.push_back(std::numeric_limits<float>::max()); for (Size k = 1; k < original_distance.dimensionsize(); ++k) { std::vector<float> row_k; row_k.reserve(k); //initialize pointer values for element to cluster pi.push_back(k); lambda.push_back(std::numeric_limits<float>::max()); // get the right distances for (Size i = 0; i < k; ++i) { row_k.push_back(original_distance.getValue(i, k)); } //calculate pointer values for element k for (Size i = 0; i < k; ++i) { if (lambda[i] >= row_k[i]) { row_k[pi[i]] = std::min(row_k[pi[i]], lambda[i]); lambda[i] = row_k[i]; pi[i] = k; } else { row_k[pi[i]] = std::min(row_k[pi[i]], row_k[i]); } } //update clustering if necessary for (Size i = 0; i < k; ++i) { if (lambda[i] >= lambda[pi[i]]) { pi[i] = k; } } setProgress(k); } for (Size i = 0; i < pi.size() - 1; ++i) { //strict order is always kept in algorithm: i < pi[i] cluster_tree.push_back(BinaryTreeNode(i, pi[i], lambda[i])); //~ std::cout << i << '\n' << pi[i] << '\n' << lambda[i] << std::endl; } //sort pre-tree std::sort(cluster_tree.begin(), cluster_tree.end(), compareBinaryTreeNode); // convert -pre-tree to correct format for (Size i = 0; i < cluster_tree.size(); ++i) { if (cluster_tree[i].right_child < cluster_tree[i].left_child) { std::swap(cluster_tree[i].left_child, cluster_tree[i].right_child); } for (Size k = i + 1; k < cluster_tree.size(); ++k) { if (cluster_tree[k].left_child == cluster_tree[i].right_child) { cluster_tree[k].left_child = cluster_tree[i].left_child; } else if (cluster_tree[k].left_child > cluster_tree[i].right_child) { --cluster_tree[k].left_child; } if (cluster_tree[k].right_child == cluster_tree[i].right_child) { cluster_tree[k].right_child = cluster_tree[i].left_child; } else if (cluster_tree[k].right_child > cluster_tree[i].right_child) { --cluster_tree[k].right_child; } } } //~ prepare to redo clustering to get all indices for binarytree in min index element representation std::vector<std::set<Size> > clusters(original_distance.dimensionsize()); for (Size i = 0; i < original_distance.dimensionsize(); ++i) { clusters[i].insert(i); } for (Size cluster_step = 0; cluster_step < cluster_tree.size(); ++cluster_step) { Size new_left_child = *(clusters[cluster_tree[cluster_step].left_child].begin()); Size new_right_child = *(clusters[cluster_tree[cluster_step].right_child].begin()); clusters[cluster_tree[cluster_step].left_child].insert(clusters[cluster_tree[cluster_step].right_child].begin(), clusters[cluster_tree[cluster_step].right_child].end()); clusters.erase(clusters.begin() + cluster_tree[cluster_step].right_child); std::swap(cluster_tree[cluster_step].left_child, new_left_child); std::swap(cluster_tree[cluster_step].right_child, new_right_child); if (cluster_tree[cluster_step].left_child > cluster_tree[cluster_step].right_child) { std::swap(cluster_tree[cluster_step].left_child, cluster_tree[cluster_step].right_child); } } endProgress(); }
void AverageLinkage::operator()(DistanceMatrix<float> & original_distance, std::vector<BinaryTreeNode> & cluster_tree, const float threshold /*=1*/) const { // input MUST have >= 2 elements! if (original_distance.dimensionsize() < 2) { throw ClusterFunctor::InsufficientInput(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Distance matrix to start from only contains one element"); } std::vector<std::set<Size> > clusters(original_distance.dimensionsize()); for (Size i = 0; i < original_distance.dimensionsize(); ++i) { clusters[i].insert(i); } cluster_tree.clear(); cluster_tree.reserve(original_distance.dimensionsize() - 1); // Initial minimum-distance pair original_distance.updateMinElement(); std::pair<Size, Size> min = original_distance.getMinElementCoordinates(); Size overall_cluster_steps(original_distance.dimensionsize()); startProgress(0, original_distance.dimensionsize(), "clustering data"); while (original_distance(min.second, min.first) < threshold) { //grow the tree cluster_tree.push_back(BinaryTreeNode(*(clusters[min.second].begin()), *(clusters[min.first].begin()), original_distance(min.first, min.second))); if (cluster_tree.back().left_child > cluster_tree.back().right_child) { std::swap(cluster_tree.back().left_child, cluster_tree.back().right_child); } if (original_distance.dimensionsize() > 2) { //pick minimum-distance pair i,j and merge them //calculate parameter for lance-williams formula float alpha_i = (float)(clusters[min.first].size() / (float)(clusters[min.first].size() + clusters[min.second].size())); float alpha_j = (float)(clusters[min.second].size() / (float)(clusters[min.first].size() + clusters[min.second].size())); //~ std::cout << alpha_i << '\t' << alpha_j << std::endl; //pushback elements of second to first (and then erase second) clusters[min.second].insert(clusters[min.first].begin(), clusters[min.first].end()); // erase first one clusters.erase(clusters.begin() + min.first); //update original_distance matrix //average linkage: new distance between clusters is the minimum distance between elements of each cluster //lance-williams update for d((i,j),k): (m_i/m_i+m_j)* d(i,k) + (m_j/m_i+m_j)* d(j,k) ; m_x is the number of elements in cluster x for (Size k = 0; k < min.second; ++k) { float dik = original_distance.getValue(min.first, k); float djk = original_distance.getValue(min.second, k); original_distance.setValueQuick(min.second, k, (alpha_i * dik + alpha_j * djk)); } for (Size k = min.second + 1; k < original_distance.dimensionsize(); ++k) { float dik = original_distance.getValue(min.first, k); float djk = original_distance.getValue(min.second, k); original_distance.setValueQuick(k, min.second, (alpha_i * dik + alpha_j * djk)); } //reduce original_distance.reduce(min.first); //update minimum-distance pair original_distance.updateMinElement(); //get min-pair from triangular matrix min = original_distance.getMinElementCoordinates(); } else { break; } setProgress(overall_cluster_steps - original_distance.dimensionsize()); //repeat until only two cluster remains, last step skips matrix operations } //fill tree with dummy nodes Size sad(*clusters.front().begin()); for (Size i = 1; (i < clusters.size()) && (cluster_tree.size() < cluster_tree.capacity()); ++i) { cluster_tree.push_back(BinaryTreeNode(sad, *clusters[i].begin(), -1.0)); } endProgress(); }
DistanceMatrix *Johnson(dgraph *g) { assert(g); unsigned int v_id = g->add_vertex(); unsigned int it = 0; for (it = 0; it < g->vsize(); it++) { if (it == v_id) continue; // Adding edge from newly created vertex to all // existing vertices with edge cost 0. g->add_edge(v_id, it, 0); } if (!BellmanFord(g, g->get_vertex(v_id))) { // Return an invalid matrix if Bellman Ford algorithm returns false. // This means there exits a negetive weight cycle. return (new DistanceMatrix(0)); } // If Bellman Ford returns true then d-values of all the vertices contain // shortest distances to all vertices from newly created vertex (v_id). int *h = (int*) calloc(g->vsize(), sizeof(int)); assert(h); for (it = 0; it < g->vsize(); it++) { assert(g->get_vertex(it)); h[it] = g->get_vertex(it)->d; } // Reassign the weights of the edges so that they are non negetive. // w'(u,v) = w(u,v) + h(u) - h(v) for (it = 0; it < g->vsize(); it++) { vertex *u = g->get_vertex(it); assert(u); unsigned int is = 0; for (is = 0; is < u->edges.size(); is++) { assert(u->edges[is]); unsigned int v_index = u->edges[is]->ends[DESTINATION]; vertex *v = g->get_vertex(v_index); assert(v); int new_cost = u->edges[is]->cost + h[it] - h[v_index]; u->edges[is]->cost = new_cost; //g->set_edge_cost(it, u->edges[is]->ends[DESTINATION], new_cost); } } DistanceMatrix *D = new DistanceMatrix(g->vsize()); for (it = 0; it < g->vsize(); it++) { vertex *u = g->get_vertex(it); Dijkstra(g, g->get_vertex(it)); unsigned int is = 0; for (is = 0; is < g->vsize(); is++) { vertex *v = g->get_vertex(is); assert(v); D->SetDistance(it, is, v->d + h[is] - h[it]); } } // Revert back the original edge weights. for (it = 0; it < g->vsize(); it++) { vertex *u = g->get_vertex(it); assert(u); unsigned int is = 0; for (is = 0; is < u->edges.size(); is++) { assert(u->edges[is]); unsigned int v_index = u->edges[is]->ends[DESTINATION]; vertex *v = g->get_vertex(v_index); assert(v); int old_cost = u->edges[is]->cost + h[v_index] - h[it]; u->edges[is]->cost = old_cost; } } //Remove the extra vertex. return D; }
void CompleteLinkage::operator()(DistanceMatrix<float> & original_distance, std::vector<BinaryTreeNode> & cluster_tree, const float threshold /*=1*/) const { // attention: clustering process is done by clustering the indices // pointing to elements in inputvector and distances in inputmatrix // input MUST have >= 2 elements! if (original_distance.dimensionsize() < 2) { throw ClusterFunctor::InsufficientInput(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Distance matrix to start from only contains one element"); } std::vector<std::set<Size> > clusters(original_distance.dimensionsize()); for (Size i = 0; i < original_distance.dimensionsize(); ++i) { clusters[i].insert(i); } cluster_tree.clear(); cluster_tree.reserve(original_distance.dimensionsize() - 1); // Initial minimum-distance pair original_distance.updateMinElement(); std::pair<Size, Size> min = original_distance.getMinElementCoordinates(); Size overall_cluster_steps(original_distance.dimensionsize()); startProgress(0, original_distance.dimensionsize(), "clustering data"); while (original_distance(min.first, min.second) < threshold) { //grow the tree cluster_tree.push_back(BinaryTreeNode(*(clusters[min.second].begin()), *(clusters[min.first].begin()), original_distance(min.first, min.second))); if (cluster_tree.back().left_child > cluster_tree.back().right_child) { std::swap(cluster_tree.back().left_child, cluster_tree.back().right_child); } if (original_distance.dimensionsize() > 2) { //pick minimum-distance pair i,j and merge them //pushback elements of second to first (and then erase second) clusters[min.second].insert(clusters[min.first].begin(), clusters[min.first].end()); // erase first one clusters.erase(clusters.begin() + min.first); //update original_distance matrix //complete linkage: new distance between clusters is the minimum distance between elements of each cluster //lance-williams update for d((i,j),k): 0.5* d(i,k) + 0.5* d(j,k) + 0.5* |d(i,k)-d(j,k)| for (Size k = 0; k < min.second; ++k) { float dik = original_distance.getValue(min.first, k); float djk = original_distance.getValue(min.second, k); original_distance.setValueQuick(min.second, k, (0.5f * dik + 0.5f * djk + 0.5f * std::fabs(dik - djk))); } for (Size k = min.second + 1; k < original_distance.dimensionsize(); ++k) { float dik = original_distance.getValue(min.first, k); float djk = original_distance.getValue(min.second, k); original_distance.setValueQuick(k, min.second, (0.5f * dik + 0.5f * djk + 0.5f * std::fabs(dik - djk))); } //reduce original_distance.reduce(min.first); //update minimum-distance pair original_distance.updateMinElement(); //get new min-pair min = original_distance.getMinElementCoordinates(); } else { break; } setProgress(overall_cluster_steps - original_distance.dimensionsize()); //repeat until only two cluster remains or threshold exceeded, last step skips matrix operations } //fill tree with dummy nodes Size sad(*clusters.front().begin()); for (Size i = 1; i < clusters.size() && (cluster_tree.size() < cluster_tree.capacity()); ++i) { cluster_tree.push_back(BinaryTreeNode(sad, *clusters[i].begin(), -1.0)); } //~ while(cluster_tree.size() < cluster_tree.capacity()) //~ { //~ cluster_tree.push_back(BinaryTreeNode(0,1,-1.0)); //~ } endProgress(); }