void testUpdateDistanceMatrix() 
	{

        PeakListCollection PLC = SamplePeakListCollection();

        DistanceMatrix DM = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.);
        DistanceMatrix Ref = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.);

        DM.addElement(99.); //Add a dummy element, we will delete it again

        unsigned int merged_lower = 4; //last two elements
        unsigned int merged_upper = 5;

        PLC.plContent_.clear();
        PLC.plContent_.resize(6,1);

        PLC.updateDistanceMatrix_(DM, merged_lower, merged_upper, 200., 0.5, 0, 0.);

        //Size should still be 5
        shouldEqual((int)DM.size(),5);

        //Matrix should be the same as the original matrix Ref
        for(unsigned int i = 0; i < 5; i++){
            for(unsigned int j = 0; j < i; j++){
                shouldEqualTolerance(DM(i,j),Ref(i,j),TOL);
            }
        }

        return;
    }
Example #2
0
void TreeTools::midpointRooting(Tree& tree)
{
    throw Exception("TreeTools::midpointRooting(Tree). This function is deprecated, use TreeTemplateTools::midRoot instead!");
    if (tree.isRooted())
        tree.unroot();
    DistanceMatrix* dist = getDistanceMatrix(tree);
    vector<size_t> pos = MatrixTools::whichMax(dist->asMatrix());
    double dmid = (*dist)(pos[0], pos[1]) / 2;
    int id1 = tree.getLeafId(dist->getName(pos[0]));
    int id2 = tree.getLeafId(dist->getName(pos[1]));
    int rootId = tree.getRootId();
    double d1 = getDistanceBetweenAnyTwoNodes(tree, id1, rootId);
    double d2 = getDistanceBetweenAnyTwoNodes(tree, id2, rootId);
    int current = d2 > d1 ? id2 : id1;
    delete dist;
    double l = tree.getDistanceToFather(current);
    double c = l;
    while (c < dmid)
    {
        current = tree.getFatherId(current);
        l = tree.getDistanceToFather(current);
        c += l;
    }
    tree.newOutGroup(current);
    int brother = tree.getSonsId(tree.getRootId())[1];
    if (brother == current)
        brother = tree.getSonsId(tree.getRootId())[0];
    tree.setDistanceToFather(current, l - (c - dmid));
    tree.setDistanceToFather(brother, c - dmid);
}
Example #3
0
    DBL_MATRIX PeakListCollection::mergeAll(double drt, double dmz, double dz, double dint)
    {
        //preprocessing: delete empty PeakLists from c_
        for(unsigned int i = 0; i < c_.size(); i++){
            if(c_[i].size() == 0){
                c_.erase(c_.begin() + i);
            }
        }

        unsigned int oldSize = c_.size();

        // initialization

        //fill correspondenceMap with rt-values
        correspondenceMap_.clear();
        correspondenceMap_.resize(oldSize);

        for(unsigned int i = 0; i < oldSize; i++){
            correspondenceMap_[i].resize(c_[i].size());
            for(unsigned int j = 0; j < c_[i].size(); j++){
                //create map_item to write to correspondenceMap
                map_item tempItem;
                //origin information contains the PeakList index and rt value
                originInformation o;
                o.rt = c_[i][j].getRt();
                o.mz = c_[i][j].getMz();
                o.intensity = c_[i][j].getAbundance();
                o.originPeakList = i;
                o.originPeak = j;
                tempItem.push_back( o );
                correspondenceMap_[i][j] = tempItem;
            }
        }

        //PeakLists are not merged in the beginning --> fill with 1
        plContent_.clear();
        plContent_.resize(oldSize,1);

        //build distance matrix
        DistanceMatrix D = buildDistanceMatrix_(drt,dmz,dz,dint);

        //merge, until there is only one PeakList left

        while(D.size() > 1){
            //find cheapest assignment
            unsigned int merge_lower = D.getMin_LowerIndex();
            unsigned int merge_upper = D.getMin_HigherIndex();

            //merge peaklists into a new one
            mergePeakLists_(merge_lower, merge_upper, c_, drt, dmz, dz, dint);

            //update distance matrix
            updateDistanceMatrix_(D, merge_lower, merge_upper, drt, dmz, dz, dint);
        }

        return calculateRtCorrespondencesFromCorrespondenceMap_(oldSize);

    }
// Find a 1:1 mapping from rows to columns of the specified square matrix such that the total cost is minimized. Returns a
// vector V such that V[i] = j maps rows i to columns j.
static std::vector<long>
findMinimumAssignment(const DistanceMatrix &matrix) {
#ifdef ROSE_HAVE_DLIB
    ASSERT_forbid(matrix.size() == 0);
    ASSERT_require(matrix.nr() == matrix.nc());

    // We can avoid the O(n^3) Kuhn-Munkres algorithm if all values of the matrix are the same.
    double minValue, maxValue;
    dlib::find_min_and_max(matrix, minValue /*out*/, maxValue /*out*/);
    if (minValue == maxValue) {
        std::vector<long> ident;
        ident.reserve(matrix.nr());
        for (long i=0; i<matrix.nr(); ++i)
            ident.push_back(i);
        return ident;
    }

    // Dlib's Kuhn-Munkres finds the *maximum* mapping over *integers*, so we negate everything to find the minumum, and we map
    // the doubles onto a reasonably large interval of integers. The interval should be large enough to have some precision,
    // but not so large that things might overflow.
    const int iGreatest = 1000000;                      // arbitrary upper bound for integer interval
    dlib::matrix<long> intMatrix(matrix.nr(), matrix.nc());
    for (long i=0; i<matrix.nr(); ++i) {
        for (long j=0; j<matrix.nc(); ++j)
            intMatrix(i, j) = round(-iGreatest * (matrix(i, j) - minValue) / (maxValue - minValue));
    }
    return dlib::max_cost_assignment(intMatrix);
#else
    throw FunctionSimilarity::Exception("dlib support is necessary for FunctionSimilarity analysis"
                                        "; see ROSE installation instructions");
#endif
}
// Given a square matrix and a 1:1 mapping from rows to columns, return the total cost of the mapping.
static double
totalAssignmentCost(const DistanceMatrix &matrix, const std::vector<long> assignment) {
    double sum = 0.0;
    ASSERT_require(matrix.nr() == matrix.nc());
    ASSERT_require((size_t)matrix.nr() == assignment.size());
    for (long i=0; i<matrix.nr(); ++i) {
        ASSERT_require(assignment[i] < matrix.nc());
        sum += matrix(i, assignment[i]);
    }
    return sum;
}
Example #6
0
int main(int argc, char **argv)
{
    welcome();
    defineverbose();

    filenames.clear();

    ClustalWInitializers();
    clustalw::ClustalWResources *resources = clustalw::ClustalWResources::Instance();
    resources->setPathToExecutable(string(argv[0]));
    setUserParameters();

    InputFile start;// object reads from an input file and creates a new file
    start.run();

	string offendingSeq;
	Clustal* clustalObj;
	clustalObj = new clustalw::Clustal();
	int u = clustalObj->sequenceInput(false, &offendingSeq);
	string phylipName;
	clustalObj->align(&phylipName);

    AminoAcidFrequency Table;
    Table.openFile("TEST-OutputFile.aln");

    vector<ProteinSequence> p1;
    p1 = start.getProteinData();
    Table.generateAminoAcidTables(p1);

    p1.clear();
    p1 = Table.getFinalSeqs();

    DistanceMatrix dm;
    dm.createDistanceTableCodes(p1);
    dm.createSimilarityMatrixCodes(p1);
    dm.createDistanceTableColours(p1);
    dm.createSimilarityMatrixColours(p1);

    VerticalPosition vp;
    vp.run(p1);

    cout<< "\nPROCESS COMPLETED!\nThe following files were created:\n" << endl;
    for ( size_t i = 0; i < filenames.size(); i++ )
    {
        cout << "\t" << i+1 << ". " << filenames[i] << endl;
    }
    return 0;
}
    void testBuildDistanceMatrix() 
	{

        PeakListCollection PLC = SamplePeakListCollection();

        DistanceMatrix DM = PLC.buildDistanceMatrix_(200., 0.5, 0, 0.);

        //PeakList 4 should have no Peaks in common with the other lists --> Cost == DBL_MAX
        for(unsigned int i = 0; i < DM.size()-1; i++){
            shouldEqual(DM(i,4),DBL_MAX);
        }

        //distance PL0 - PL2 should be sqrt(0.1^2/0.5^2 + 30^2/200^2) = 0.25
        shouldEqualTolerance(DM(0,2),0.25,TOL);
        shouldEqualTolerance(DM(2,0),0.25,TOL);

        return;
    }
Example #8
0
    //build a distance matrix out of all PeakLists
    DistanceMatrix PeakListCollection::buildDistanceMatrix_(double drt, double dmz, double dz, double dint = 0.)
    {
        //number of Peaklists
        unsigned int numPl = c_.size();

        DistanceMatrix DM;
        //Add new elements to the Distance Matrix
        for(unsigned int i = 0; i<numPl; i++){
            DM.addElement();
            //Calculate distance to the other elements / the cost of merging them
            for(unsigned int j = 0; j<i; j++){
                StableMarriage sm(c_[i],c_[j],drt,dmz,dz,dint);
                sm.setLimit(1.);
                DM(i,j) = sm.getCost();
            }
        }

        return DM;
    }
Example #9
0
    //Update the Distance Matrix
    void PeakListCollection::updateDistanceMatrix_(DistanceMatrix& D, unsigned int merged_lower, unsigned int merged_upper,
                                                   double drt, double dmz, double dz, double dint = 0.)
    {

        //remove distances of merged PeakLists from distance matrix
        D.deleteElement(merged_upper);
        D.deleteElement(merged_lower);

        //add a new column for the merged PeakList
        D.addElement();

        //fill distances
        unsigned int sz = D.size();
        for(unsigned int i = 0; i<sz-1; i++){
            StableMarriage sm(c_[i],c_[sz-1],drt,dmz,dz,dint);
            sm.setLimit(1.);
            D(i,sz-1) = sm.getCost();
        }

        return;
    }
Example #10
0
    DBL_MATRIX PeakListCollection::getAlignment(double drt, double dmz, double dz, double dint, AccelerationFlag flag = NORMAL, int param = 0)
    {

        //Create a copy of c_ to work on
        std::vector<PeakList> pls = c_;

        unsigned int oldSize = pls.size();

        // initialization

        //fill correspondenceMap with rt-values
        correspondenceMap_.clear();
        correspondenceMap_.resize(oldSize);

        for(unsigned int i = 0; i < oldSize; i++){
            correspondenceMap_[i].resize(pls[i].size());
            for(unsigned int j = 0; j < pls[i].size(); j++){
                //create map_item to write to correspondenceMap
                map_item tempItem;
                //origin information contains the PeakList index and rt value
                originInformation o;
                o.rt = c_[i][j].getRt();
                o.mz = c_[i][j].getMz();
                o.intensity = c_[i][j].getAbundance();
                o.originPeakList = i;
                o.originPeak = j;
                tempItem.push_back( o );
                correspondenceMap_[i][j] = tempItem;
            }
        }

        //PeakLists are not merged in the beginning --> fill with 1
        plContent_.clear();
        plContent_.resize(oldSize,1);

        DistanceMatrix D;
        if(flag == NORMAL){
            //build distance matrix
            D = buildDistanceMatrix_(drt,dmz,dz,dint);
        }

        if(flag == REFERENCE){
            //first, merge PeakList n with one other --> last PeakList contains PeakList n
            if(param==0){
                //select first two peaklists
                unsigned int merge_lower = param;
                unsigned int merge_upper = 1;

                //merge peaklists into a new one
                mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint);
            }else{
                //select first and n'th PeakList
                unsigned int merge_lower = 0;
                unsigned int merge_upper = param;

                //merge peaklists into a new one
                mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint);
            }
        }

        //merge, until there is only one PeakList left
        bool exit = false;
        while(!exit){
            //find cheapest assignment
            unsigned int merge_lower = 0;
            unsigned int merge_upper = 1;
            switch(flag){
            case NORMAL:
                merge_lower = D.getMin_LowerIndex();
                merge_upper = D.getMin_HigherIndex();
                break;

            case FAST:
                //select last two PeakList
                merge_lower = pls.size()-2;
                merge_upper = pls.size()-1;
                break;

            case REFERENCE:
                //select last two PeakList
                merge_lower = pls.size()-2;
                merge_upper = pls.size()-1;
                break;
            }

            //merge peaklists into a new one
            mergePeakLists_(merge_lower, merge_upper, pls, drt, dmz, dz, dint);


            switch(flag){
            case NORMAL:
                //update distance matrix
                updateDistanceMatrix_(D, merge_lower, merge_upper, drt, dmz, dz, dint);
                if(D.size()==1){
                    exit = true;
                }
                break;

            case FAST:
                if(pls.size()==1){
                    exit = true;
                }
                break;

            case REFERENCE:
                if(pls.size()==1){
                    exit = true;
                }
                break;
            }
        }

        DBL_MATRIX rtc = calculateRtCorrespondencesFromCorrespondenceMap_(oldSize);

        return rtc;


    }
Example #11
0
pair<vector<int>, double>  DistanceMatrix::compareAllWindows(DistanceMatrix &_distMat, int choice){
   vector<MatrixWindow*> listOther = _distMat.getMatrixWindows();
    int length = listMW.size();
    int lengthOther = listOther.size();
    vector<int> minCoord(2, 0.0);
    MatrixWindow *minWindow1 = NULL;
    MatrixWindow *minWindow2 = NULL;
    double minLikeness= MslTools::doubleMax;

    for (int i =0; i<length; i++){//loops through listMW to get comparee

	MatrixWindow *win1 = listMW[i];
	for (int j=0; j<lengthOther; j++){//loops through listOther to get comparor

	    MatrixWindow *win2 = listOther[j];
	    
	    double likeness;
	    //decides which compare method from MatrixWindow to call
	    switch(choice){
		    case standard:
			likeness = (*win1).compare((*win2));
			break;
		    case diag:
			likeness = (*win1).compareDiagonal((*win2));
			break;
		    case doubleDiag:
			likeness = (*win1).compareDoubleDiagonal((*win2));
			break;
		    case minDist:
			likeness = (*win1).compareMinDist((*win2));
			break;
		    case minDistRow:
			likeness=(*win1).compareMinRow((*win2));
			break;
		    case minDistCol:
			likeness=(*win1).compareMinCol((*win2));
			break;
		    default:
			cout<<"Invalid argument in function DistanceMatrix::compareAll()."<<endl;
			exit(333);
	    }//end switch
	    
	    if (likeness<minLikeness && abs(likeness - minLikeness) > 0.001){
		    cout << "New likeness: "<<likeness<<endl;
		minLikeness=likeness;
		minWindow1 = win1;
		minWindow2 = win2;
		minCoord[0]= i;
		minCoord[1]=j;

	    }//endif
	    win2=NULL;
	}//end for on j
	win1= NULL;
    }//end for on i

    
    pair<vector<int>, double> coordsAndLikeness(minCoord, minLikeness);

    minWindow1=NULL;
    minWindow2=NULL;

    return coordsAndLikeness;

}
Example #12
0
//must add segID
void DistanceMatrix::printCompareInfo(DistanceMatrix &_distMat, pair<vector<int>, double> _result, int choice){
    
    //retrieve values from the pair
    vector<int> mwIndex(2, 0.0);
    mwIndex= _result.first;
    double minLikeness = _result.second;

    //retrieve the winning Matrix Windows
    vector<MatrixWindow*> listMW2 = _distMat.getMatrixWindows();
    MatrixWindow *minWindow1 = listMW[mwIndex[0]];
    MatrixWindow *minWindow2 = listMW2[mwIndex[1]];

    //print information
    int i1 = (*minWindow1).getLeftR();
    int j1 = (*minWindow1).getLeftC();
    int i2 = (*minWindow2).getLeftR();
    int j2 = (*minWindow2).getLeftC();
     
    string i1ID = atomVec[i1]->getSegID().c_str();
    string j1ID = atomVec[j1]->getSegID().c_str();
    string i2ID = _distMat.getAtomVector()[i2]->getSegID().c_str();
    string j2ID = _distMat.getAtomVector()[j2]->getSegID().c_str();
    
    if(i1ID=="" || j1ID=="" || i2ID=="" ||j2ID==""){
   
	i1ID = atomVec[i1]->getChainId().c_str();
	j1ID = atomVec[j1]->getChainId().c_str();
	i2ID = _distMat.getAtomVector()[i2]->getChainId().c_str();
	j2ID = _distMat.getAtomVector()[j2]->getChainId().c_str();
    }

    int i1res = atomVec[i1]->getResidueNumber();
    int j1res = atomVec[j1]->getResidueNumber();
    int i2res = _distMat.getAtomVector()[i2]->getResidueNumber();
    int j2res = _distMat.getAtomVector()[j2]->getResidueNumber();

    string PDBname= getFileName(PDBid);
    string PDBnameShort = PDBname.substr(0,17);

    string PDBname2 = getFileName(_distMat.getPDBid());
    string PDBnameShort2 = PDBname2.substr(0,17);

    cout<<"Comparing PDBs "<<PDBnameShort<<", "<<PDBnameShort2<<endl;


    switch(choice){
	    case standard:
		fprintf(stdout, "Standard compare:\t\tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness);
		break;
	    case diag:
		fprintf(stdout, "Diagonal compare: \t\tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness);
	     	break;
	    case doubleDiag:
		fprintf(stdout, "Double Diagonal compare: \tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness);
		break;
	    case minDist:
		fprintf(stdout, "Minimum Distance compare: \tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness);
		break;
	    case minDistRow:
		fprintf(stdout, "Minimum Distance Row compare: \tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness);
		break;
	    case minDistCol:
		fprintf(stdout, "Minimum Distance Column compare: \tWindow1 %3d,%3d (Residues: %1s%3d, %1s%3d)\tWindow2 %3d,%3d (Residues: %1s%3d, %1s%3d)\t%8.3f\n", i1, j1, i1ID.c_str(), i1res, j1ID.c_str(), j1res, i2, j2, i2ID.c_str(), i2res, j2ID.c_str(), j2res, minLikeness);
		break;
	    default:
		cout<<"Error. Incorrect int value (choice) in DistanceMatrix::printCompareInfo(...)"<<endl;
		exit(334);
    }//end switch
}
Example #13
0
vector<DistanceMatrixResult> DistanceMatrix::multiCompareAllWindows(DistanceMatrix &_distMat, int choice, int _numCompare){
/*
    //which chains to skip:
    map<string, bool> forbiddenIDMat1;
    map<string, bool> forbiddenIDMat2;

    // Maintain the proper spacing between residues of different matrix window pairs. 
    map<string,int> properRegisterRow;
    map<string,int> properRegisterCol;
    map<string,int>::iterator findRegistry;
*/
    //get list of MWs to compare
    vector<MatrixWindow*> listOther = _distMat.getMatrixWindows();
    int length = listMW.size();
    int lengthOther = listOther.size();

    //vector of objects to return
    vector<DistanceMatrixResult> returnVec;

    //loop over number of times to compare all
    for(int k=0; k<_numCompare; k++){

	//minimum windows and indices
	MatrixWindow *minWindow1 = NULL;
	MatrixWindow *minWindow2 = NULL;
	double minLikeness = 1000000;
	int minIndex1=0;
	int minIndex2=0;

	//IDs to skip in the future
	string i1IDWin;
	string j1IDWin;
	string i2IDWin;
	string j2IDWin;


    //which chains to skip:
    map<string, bool> forbiddenIDMat1;
    map<string, bool> forbiddenIDMat2;

    // Maintain the proper spacing between residues of different matrix window pairs. 
    map<string,int> properRegisterRow;
    map<string,int> properRegisterCol;
    map<string,int>::iterator findRegistry;


	for (int i =0; i<length; i++){//loops through listMW to get compare

	    MatrixWindow *win1 = listMW[i];
	    for (int j=0; j<lengthOther; j++){//loops through listOther to get comparor
		
		MatrixWindow *win2 = listOther[j];

		//get the ID (seg or chain) so we can filter ones we want to skip
		int i1 = win1->getLeftR();
		int j1 = win1->getLeftC();
		int i2 = win2->getLeftR();
		int j2 = win2->getLeftC();
     
		string i1ID = atomVec[i1]->getSegID();
		string j1ID = atomVec[j1]->getSegID();
		string i2ID = _distMat.getAtomVector()[i2]->getSegID();
		string j2ID = _distMat.getAtomVector()[j2]->getSegID();
    
		if(i1ID=="" || j1ID=="" || i2ID=="" ||j2ID==""){
   
		    i1ID = atomVec[i1]->getChainId();
		    j1ID = atomVec[j1]->getChainId();
		    i2ID = _distMat.getAtomVector()[i2]->getChainId();
		    j2ID = _distMat.getAtomVector()[j2]->getChainId();
		}//end if
		
		
		// Skip if both chains are forbidden within a matrix
		if (forbiddenIDMat1.find(i1ID+":"+j1ID)!=forbiddenIDMat1.end() || forbiddenIDMat2.find(i2ID+":"+j2ID)!=forbiddenIDMat2.end()) continue;
		

		// Skip if both inter-matrix segids found and if difference in residue number is not the same.
		findRegistry = properRegisterRow.find(i1ID+":"+i2ID);
		double diffInResidueNumber = atomVec[i1]->getResidueNumber() - _distMat.getAtomVector()[i2]->getResidueNumber();
		if (findRegistry != properRegisterRow.end() && findRegistry->second != diffInResidueNumber) continue;

		findRegistry        = properRegisterCol.find(j1ID+":"+j2ID);
		diffInResidueNumber = atomVec[j1]->getResidueNumber() - _distMat.getAtomVector()[j2]->getResidueNumber();
		if (findRegistry != properRegisterCol.end() && findRegistry->second != diffInResidueNumber) continue;

		
		double likeness;
		//decides which compare method from MatrixWindow to call
		switch(choice){
		    case standard:
			likeness = (*win1).compare((*win2));
			break;
		    case diag:
			likeness = (*win1).compareDiagonal((*win2));
			break;
		    case doubleDiag:
			likeness = (*win1).compareDoubleDiagonal((*win2));
			break;
		    case minDist:
			likeness = (*win1).compareMinDist((*win2));
			break;
		    case minDistRow:
			likeness=(*win1).compareMinRow((*win2));
			break;
		    case minDistCol:
			likeness=(*win1).compareMinCol((*win2));
			break;
		    default:
			cout<<"Invalid argument in function DistanceMatrix::compareAll()."<<endl;
			exit(333);
		}//end switch

		if (likeness<minLikeness && abs(likeness - minLikeness) > 0.001){
		    minLikeness = likeness;
		    minWindow1 = win1;
		    minWindow2 = win2;
		    minIndex1 = i;
		    minIndex2 = j;
		    
		    
		    //set the ID to avoid in the future
		    i1IDWin = i1ID;
		    j1IDWin = j1ID;
		    i2IDWin = i2ID;
		    j2IDWin = j2ID;
		    
		 
		}//endif
		win2=NULL;
	    }//end for on j
	    win1= NULL;
	}//end for on i
	

	// Allowed   inter matrix identifier[SEGID] difference in residue numbers
	vector<int> residueNumbers1 = minWindow1->getUpLeftResidueNumbers();
	vector<int> residueNumbers2 = minWindow2->getUpLeftResidueNumbers();

	properRegisterRow[i1IDWin+":"+i2IDWin] =   (residueNumbers1[0] - residueNumbers2[0]);
	properRegisterRow[i2IDWin+":"+i1IDWin] = - (residueNumbers1[0] - residueNumbers2[0]);
	properRegisterCol[j1IDWin+":"+j2IDWin] =   (residueNumbers1[1] - residueNumbers2[1]);
	properRegisterCol[j2IDWin+":"+j1IDWin] = - (residueNumbers1[1] - residueNumbers2[1]);

	// Forbidden intra matrix identifier[SEGID] pairs
	forbiddenIDMat1[i1IDWin+":"+j1IDWin] = false;
	forbiddenIDMat1[j1IDWin+":"+i1IDWin] = false;	
	forbiddenIDMat2[i2IDWin+":"+j2IDWin] = false;	
	forbiddenIDMat2[j2IDWin+":"+i2IDWin] = false;	


	DistanceMatrixResult currentResult(*this, *minWindow1, _distMat, *minWindow2, minLikeness);

	returnVec.push_back(currentResult);

	minWindow1=NULL;
	minWindow2=NULL;

    }//end for on k

    return returnVec;

}
Example #14
0
TreeTemplate<Node>* OptimizationTools::buildDistanceTree(
  DistanceEstimation& estimationMethod,
  AgglomerativeDistanceMethod& reconstructionMethod,
  const ParameterList& parametersToIgnore,
  bool optimizeBrLen,
  const std::string& param,
  double tolerance,
  unsigned int tlEvalMax,
  OutputStream* profiler,
  OutputStream* messenger,
  unsigned int verbose) throw (Exception)
{
  estimationMethod.resetAdditionalParameters();
  estimationMethod.setVerbose(verbose);
  if (param == DISTANCEMETHOD_PAIRWISE)
  {
    ParameterList tmp = estimationMethod.getSubstitutionModel().getIndependentParameters();
    tmp.addParameters(estimationMethod.getRateDistribution().getIndependentParameters());
    tmp.deleteParameters(parametersToIgnore.getParameterNames());
    estimationMethod.setAdditionalParameters(tmp);
  }
  TreeTemplate<Node>* tree = NULL;
  TreeTemplate<Node>* previousTree = NULL;
  bool test = true;
  while (test)
  {
    // Compute matrice:
    if (verbose > 0)
      ApplicationTools::displayTask("Estimating distance matrix", true);
    estimationMethod.computeMatrix();
    DistanceMatrix* matrix = estimationMethod.getMatrix();
    if (verbose > 0)
      ApplicationTools::displayTaskDone();

    // Compute tree:
    if (matrix->size() == 2) {
      //Special case, there is only one possible tree:
      Node* n1 = new Node(0);
      Node* n2 = new Node(1, matrix->getName(0));
      n2->setDistanceToFather((*matrix)(0,0) / 2.);
      Node* n3 = new Node(2, matrix->getName(1));
      n3->setDistanceToFather((*matrix)(0,0) / 2.);
      n1->addSon(n2);
      n1->addSon(n3);
      tree = new TreeTemplate<Node>(n1);
      break;
    }
    if (verbose > 0)
      ApplicationTools::displayTask("Building tree");
    reconstructionMethod.setDistanceMatrix(*matrix);
    reconstructionMethod.computeTree();
    previousTree = tree;
    delete matrix;
    tree = dynamic_cast<TreeTemplate<Node>*>(reconstructionMethod.getTree());
    if (verbose > 0)
      ApplicationTools::displayTaskDone();
    if (previousTree && verbose > 0)
    {
      int rf = TreeTools::robinsonFouldsDistance(*previousTree, *tree, false);
      ApplicationTools::displayResult("Topo. distance with previous iteration", TextTools::toString(rf));
      test = (rf == 0);
      delete previousTree;
    }
    if (param != DISTANCEMETHOD_ITERATIONS)
      break;  // Ends here.

    // Now, re-estimate parameters:
    auto_ptr<SubstitutionModel> model(estimationMethod.getSubstitutionModel().clone());
    auto_ptr<DiscreteDistribution> rdist(estimationMethod.getRateDistribution().clone());
    DRHomogeneousTreeLikelihood tl(*tree,
        *estimationMethod.getData(),
        model.get(),
        rdist.get(),
        true, verbose > 1);
    tl.initialize();
    ParameterList parameters = tl.getParameters();
    if (!optimizeBrLen)
    {
      vector<string> vs = tl.getBranchLengthsParameters().getParameterNames();
      parameters.deleteParameters(vs);
    }
    parameters.deleteParameters(parametersToIgnore.getParameterNames());
    optimizeNumericalParameters(&tl, parameters, NULL, 0, tolerance, tlEvalMax, messenger, profiler, verbose > 0 ? verbose - 1 : 0);
    if (verbose > 0)
    {
      ParameterList tmp = tl.getSubstitutionModelParameters();
      for (unsigned int i = 0; i < tmp.size(); i++)
      {
        ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
      }
      tmp = tl.getRateDistributionParameters();
      for (unsigned int i = 0; i < tmp.size(); i++)
      {
        ApplicationTools::displayResult(tmp[i].getName(), TextTools::toString(tmp[i].getValue()));
      }
    }
  }
  return tree;
}
int main(int argc, char *argv[]){
    
    // Option Parser
    Options opt = setupOptions(argc,argv);

    ifstream fs2;

    //create system and dm for first PDB
    PDBReader reader;
    reader.open(opt.inputPDB);
    reader.read();
    reader.close();

    System *constSys = new System(reader.getAtoms());
    DistanceMatrix constDM;
    
    //add CA atoms to the atom vectors
    for (int j=0; j<constSys->residueSize(); j++){
	Residue &tempRes=constSys->getResidue(j);
	if (tempRes.exists("CA")){
	    constDM.addAtom(tempRes("CA"));
	}
    }//end for on j

    
    //fill the DistanceMatrix and set window size
    constDM.setGeneralWinSize(opt.windowSize);
    constDM.createDistanceMatrix();
    constDM.setIntraChain(opt.intraChainCompare);
    constDM.setPDBid(opt.inputPDB);
    constDM.setDebug(opt.debug);

    //create matrix windows
    constDM.createMatrixWindows();

    delete(constSys);


    if (constDM.getMatrixWindows().size()==0){
	    cout<<"Uh-oh.All the windows got filtered in the PDB you wanted to compare against."<<endl;
	    exit(111);
    }

// COMMMENT OUT BEGINS

/*

    //read in list of PDBs to compare to first PDB
    vector<string> list;
    ifstream fs;

    fs.open(opt.pdbList.c_str());
    if (fs.fail()){
	cerr<<"Cannot open file "<<opt.pdbList<<endl;
	exit(1);
    }

    while(true){
	string line;
	getline(fs, line);

	if(fs.fail()){
	    //no more lines to read, quite the while.
	    break;
	}

	if(line==""){
	    continue;
	}
	list.push_back(line);
    }

    fs.close();


    // List of distance matrices, one for each PDB
    vector<DistanceMatrix> DMVec(list.size());

    // A system object for each PDB
    vector<System*> sysVec(list.size(), NULL);

    // Create DistanceMatrix and System Objects for list of PDBs
    for(int i=0; i<list.size(); i++){

	cout<<i<<"create sys and dm."<<endl;

	PDBReader rAv;
	rAv.open(list[i]);
	rAv.read();
	rAv.close();

	sysVec[i] =new System(rAv.getAtoms());
    
	//add CA atoms to the atom vectors
	for (int j=0; j<sysVec[i]->residueSize(); j++){
	    Residue &tempRes=sysVec[i]->getResidue(j);
	    if (tempRes.exists("CA")){
		//only add CA if it is on a helix
		string segID = tempRes("CA").getSegID();
		//if(segID == "" || segID.at(0) == 'H'){
		    DMVec[i].addAtom(tempRes("CA"));
		    //	}
	    }
	}//end for on j
	//fill the DistanceMatrix and set window size
	DMVec[i].setGeneralWinSize(opt.windowSize);
	DMVec[i].createDistanceMatrix();
	DMVec[i].setIntraChain(opt.intraChainCompare);
	DMVec[i].setPDBid(list[i]);

	//create matrix windows
	DMVec[i].createMatrixWindows();
	
	delete(sysVec[i]);

    }//end for on i
*/

// COMMMENT OUT ENDS

// NEW BEGINS

// load distance matrix database from an external binary file
    DistanceMatrixDatabase dmd;
    dmd.load_checkpoint("try.bin");

// List of distance matrices, one for each PDB
//************* from now on, all DMVec become pointers ****************//
    vector<DistanceMatrix *> &DMVec = dmd.getDistanceMatrixList();
/*
    for (uint i = 0;i < dms.size();i++){
            cout << "DM["<<i<<"]: "<<dms[i]->getPDBid()<<endl;
    }
    cout << "Done"<<endl;
*/

    
    //ManageResults to take care of printing/sorting at end
    ManageDistanceMatrixResults resultManager;

	    
    for(int i=0; i<DMVec.size(); i++){

	    cout<< "Trying "<<DMVec[i]->getPDBid()<<" ("<<i<<") # Residues: "<<DMVec.size()<<" Number of MatrixWindows to compare: "<<DMVec[i]->getMatrixWindows().size();

	    //don't compare if all of the windows got filtered out
	    if (DMVec[i]->getMatrixWindows().size() == 0){
		    cout << " Sorry Zero Matrix Windows !"<<endl;
		    continue;
	    }
	    
	    cout <<endl;

	    vector<DistanceMatrixResult> resultsToAdd;

	    if(opt.searchCriteria=="standard"){
		resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::standard, opt.numberOfIterations);
	    }//end if
	    if(opt.searchCriteria=="diagonal"){
		resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::diag, opt.numberOfIterations);
       	    }
	    if(opt.searchCriteria=="doubleDiagonal"){
		resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::doubleDiag, opt.numberOfIterations);
	    }
	    if(opt.searchCriteria=="minDistance"){
		resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::minDist, opt.numberOfIterations);
	    }
	    if(opt.searchCriteria=="minDistanceRow"){
		resultsToAdd = constDM.multiCompareAllWindows(*DMVec[i], DistanceMatrix::minDistRow, opt.numberOfIterations);
	    }

	    bool addFlag = false;
 	    for (uint j = 0; j< resultsToAdd.size();j++){
 		    if (opt.likenessTolerance == MslTools::doubleMax || resultsToAdd[j].getLikeness() <= opt.likenessTolerance){
			    addFlag = true;
			    break;
 		    }
	    }

	    if (addFlag &&  resultsToAdd.size() > 0){

		    resultManager.addResults(resultsToAdd);	    		    
	    }


    }//end for on i

// NEW ENDS
    
    cout << "Printing"<<endl;
    resultManager.setAlignPdbs(opt.alignPdbs);
//    cout << "hello"<<endl;
    resultManager.setRmsdTol(opt.rmsdTol);
//    cout << "hello again"<<endl;
    resultManager.printResults();

    cout << "Done."<<endl;
    return 0;
}
Example #16
0
string Alignment::_computeTree(DistanceMatrix dists, DistanceMatrix vars) throw (Exception) {
    // Initialization:
    std::map<size_t, Node*> currentNodes_;
    std::vector<double> sumDist_(dists.size());
    double lambda_;

    for (size_t i = 0; i < dists.size(); i++) {
        currentNodes_[i] = new Node(static_cast<int>(i), dists.getName(i));
    }
    int idNextNode = dists.size();
    vector<double> newDist(dists.size());
    vector<double> newVar(dists.size());

    // Build tree:
    while (currentNodes_.size() > 3) {
        // get best pair
        for (std::map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
            size_t id = i->first;
            sumDist_[id] = 0;
            for (map<size_t, Node*>::iterator j = currentNodes_.begin(); j != currentNodes_.end(); j++) {
                size_t jd = j->first;
                sumDist_[id] += dists(id, jd);
            }
        }
        vector<size_t> bestPair(2);
        double critMax = std::log(0.);
        for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
            size_t id = i->first;
            map<size_t, Node*>::iterator j = i;
            j++;
            for ( ; j != currentNodes_.end(); j++) {
                size_t jd = j->first;
                double crit = sumDist_[id] + sumDist_[jd] - static_cast<double>(currentNodes_.size() - 2) * dists(id, jd);
                // cout << "\t" << id << "\t" << jd << "\t" << crit << endl;
                if (crit > critMax) {
                    critMax = crit;
                    bestPair[0] = id;
                    bestPair[1] = jd;
                }
            }
        }
        if (critMax == std::log(0.)) throw Exception("Unexpected error: no maximum criterium found.");

        // get branch lengths for pair
        double ratio = (sumDist_[bestPair[0]] - sumDist_[bestPair[1]]) / static_cast<double>(currentNodes_.size() - 2);
        vector<double> d(2);

        d[0] = std::max(.5 * (dists(bestPair[0], bestPair[1]) + ratio), MIN_BRANCH_LENGTH);
        d[1] = std::max(.5 * (dists(bestPair[0], bestPair[1]) - ratio), MIN_BRANCH_LENGTH);

        Node* best1 = currentNodes_[bestPair[0]];
        Node* best2 = currentNodes_[bestPair[1]];

        // Distances may be used by getParentNodes (PGMA for instance).
        best1->setDistanceToFather(d[0]);
        best2->setDistanceToFather(d[1]);
        Node* parent = new Node(idNextNode++);
        parent->addSon(best1);
        parent->addSon(best2);

        // compute lambda
        lambda_ = 0;
        if (vars(bestPair[0], bestPair[1]) == 0) lambda_ = .5;
        else {
            for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
                size_t id = i->first;
                if (id != bestPair[0] && id != bestPair[1]) lambda_ += (vars(bestPair[1], id) - vars(bestPair[0], id));
            }
            double div = 2 * static_cast<double>(currentNodes_.size() - 2) * vars(bestPair[0], bestPair[1]);
            lambda_ /= div;
            lambda_ += .5;
        }
        if (lambda_ < 0.) lambda_ = 0.;
        if (lambda_ > 1.) lambda_ = 1.;

        for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
            size_t id = i->first;
            if (id != bestPair[0] && id != bestPair[1]) {
                newDist[id] = std::max(lambda_ * (dists(bestPair[0], id) - d[0]) + (1 - lambda_) * (dists(bestPair[1], id) - d[1]), 0.);
                newVar[id] = lambda_ * vars(bestPair[0], id) + (1 - lambda_) * vars(bestPair[1], id) - lambda_ * (1 - lambda_) * vars(bestPair[0], bestPair[1]);
            }
          else newDist[id] = 0;
        }
        // Actualize currentNodes_:
        currentNodes_[bestPair[0]] = parent;
        currentNodes_.erase(bestPair[1]);
        for (map<size_t, Node*>::iterator i = currentNodes_.begin(); i != currentNodes_.end(); i++) {
            size_t id = i->first;
            dists(bestPair[0], id) = dists(id, bestPair[0]) = newDist[id];
            vars(bestPair[0], id) =  vars(id, bestPair[0]) = newVar[id];
        }
    }
    // final step
    Node* root = new Node(idNextNode);
    map<size_t, Node* >::iterator it = currentNodes_.begin();
    size_t i1 = it->first;
    Node* n1       = it->second;
    it++;
    size_t i2 = it->first;
    Node* n2       = it->second;
    if (currentNodes_.size() == 2) {
        // Rooted
        double d = dists(i1, i2) / 2;
        root->addSon(n1);
        root->addSon(n2);
        n1->setDistanceToFather(d);
        n2->setDistanceToFather(d);
    }
    else {
        // Unrooted
        it++;
        size_t i3 = it->first;
        Node* n3       = it->second;
        double d1 = std::max(dists(i1, i2) + dists(i1, i3) - dists(i2, i3), MIN_BRANCH_LENGTH);
        double d2 = std::max(dists(i2, i1) + dists(i2, i3) - dists(i1, i3), MIN_BRANCH_LENGTH);
        double d3 = std::max(dists(i3, i1) + dists(i3, i2) - dists(i1, i2), MIN_BRANCH_LENGTH);
        root->addSon(n1);
        root->addSon(n2);
        root->addSon(n3);
        n1->setDistanceToFather(d1 / 2.);
        n2->setDistanceToFather(d2 / 2.);
        n3->setDistanceToFather(d3 / 2.);
    }
    Tree *tree_ = new TreeTemplate<Node>(root);
    stringstream ss;
    Newick treeWriter;
    if (!tree_) throw Exception("The tree is empty");
    treeWriter.write(*tree_, ss);
    delete tree_;
    string s{ss.str()};
    s.erase(s.find_last_not_of(" \n\r\t")+1);
    return s;
}
Example #17
0
  void SingleLinkage::operator()(DistanceMatrix<float> & original_distance, std::vector<BinaryTreeNode> & cluster_tree, const float threshold /*=1*/) const
  {
    // input MUST have >= 2 elements!
    if (original_distance.dimensionsize() < 2)
    {
      throw ClusterFunctor::InsufficientInput(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Distance matrix to start from only contains one element");
    }

    cluster_tree.clear();
    if (threshold < 1)
    {
      LOG_ERROR << "You tried to use Single Linkage clustering with a threshold. This is currently not supported!" << std::endl;
      throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
    }

    //SLINK
    std::vector<Size> pi;
    pi.reserve(original_distance.dimensionsize());
    std::vector<float> lambda;
    lambda.reserve(original_distance.dimensionsize());

    startProgress(0, original_distance.dimensionsize(), "clustering data");

    //initialize first pointer values
    pi.push_back(0);
    lambda.push_back(std::numeric_limits<float>::max());

    for (Size k = 1; k < original_distance.dimensionsize(); ++k)
    {
      std::vector<float> row_k;
      row_k.reserve(k);

      //initialize pointer values for element to cluster
      pi.push_back(k);
      lambda.push_back(std::numeric_limits<float>::max());

      // get the right distances
      for (Size i = 0; i < k; ++i)
      {
        row_k.push_back(original_distance.getValue(i, k));
      }

      //calculate pointer values for element k
      for (Size i = 0; i < k; ++i)
      {
        if (lambda[i] >= row_k[i])
        {
          row_k[pi[i]] = std::min(row_k[pi[i]], lambda[i]);
          lambda[i] = row_k[i];
          pi[i] = k;
        }
        else
        {
          row_k[pi[i]] = std::min(row_k[pi[i]], row_k[i]);
        }
      }

      //update clustering if necessary
      for (Size i = 0; i < k; ++i)
      {
        if (lambda[i] >= lambda[pi[i]])
        {
          pi[i] = k;
        }
      }
      setProgress(k);
    }

    for (Size i = 0; i < pi.size() - 1; ++i)
    {
      //strict order is always kept in algorithm: i < pi[i]
      cluster_tree.push_back(BinaryTreeNode(i, pi[i], lambda[i]));
      //~ std::cout << i << '\n' << pi[i] << '\n' << lambda[i] << std::endl;
    }

    //sort pre-tree
    std::sort(cluster_tree.begin(), cluster_tree.end(), compareBinaryTreeNode);

    // convert -pre-tree to correct format
    for (Size i = 0; i < cluster_tree.size(); ++i)
    {
      if (cluster_tree[i].right_child < cluster_tree[i].left_child)
      {
        std::swap(cluster_tree[i].left_child, cluster_tree[i].right_child);
      }
      for (Size k = i + 1; k < cluster_tree.size(); ++k)
      {
        if (cluster_tree[k].left_child == cluster_tree[i].right_child)
        {
          cluster_tree[k].left_child = cluster_tree[i].left_child;
        }
        else if (cluster_tree[k].left_child > cluster_tree[i].right_child)
        {
          --cluster_tree[k].left_child;
        }
        if (cluster_tree[k].right_child == cluster_tree[i].right_child)
        {
          cluster_tree[k].right_child = cluster_tree[i].left_child;
        }
        else if (cluster_tree[k].right_child > cluster_tree[i].right_child)
        {
          --cluster_tree[k].right_child;
        }
      }

    }
    //~ prepare to redo clustering to get all indices for binarytree in min index element representation
    std::vector<std::set<Size> > clusters(original_distance.dimensionsize());
    for (Size i = 0; i < original_distance.dimensionsize(); ++i)
    {
      clusters[i].insert(i);
    }
    for (Size cluster_step = 0; cluster_step < cluster_tree.size(); ++cluster_step)
    {
      Size new_left_child = *(clusters[cluster_tree[cluster_step].left_child].begin());
      Size new_right_child = *(clusters[cluster_tree[cluster_step].right_child].begin());
      clusters[cluster_tree[cluster_step].left_child].insert(clusters[cluster_tree[cluster_step].right_child].begin(), clusters[cluster_tree[cluster_step].right_child].end());
      clusters.erase(clusters.begin() + cluster_tree[cluster_step].right_child);
      std::swap(cluster_tree[cluster_step].left_child, new_left_child);
      std::swap(cluster_tree[cluster_step].right_child, new_right_child);
      if (cluster_tree[cluster_step].left_child > cluster_tree[cluster_step].right_child)
      {
        std::swap(cluster_tree[cluster_step].left_child, cluster_tree[cluster_step].right_child);
      }
    }

    endProgress();
  }
Example #18
0
  void AverageLinkage::operator()(DistanceMatrix<float> & original_distance, std::vector<BinaryTreeNode> & cluster_tree, const float threshold /*=1*/) const
  {
    // input MUST have >= 2 elements!
    if (original_distance.dimensionsize() < 2)
    {
      throw ClusterFunctor::InsufficientInput(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Distance matrix to start from only contains one element");
    }

    std::vector<std::set<Size> > clusters(original_distance.dimensionsize());
    for (Size i = 0; i < original_distance.dimensionsize(); ++i)
    {
      clusters[i].insert(i);
    }

    cluster_tree.clear();
    cluster_tree.reserve(original_distance.dimensionsize() - 1);

    // Initial minimum-distance pair
    original_distance.updateMinElement();
    std::pair<Size, Size> min = original_distance.getMinElementCoordinates();

    Size overall_cluster_steps(original_distance.dimensionsize());
    startProgress(0, original_distance.dimensionsize(), "clustering data");

    while (original_distance(min.second, min.first) < threshold)
    {
      //grow the tree
      cluster_tree.push_back(BinaryTreeNode(*(clusters[min.second].begin()), *(clusters[min.first].begin()), original_distance(min.first, min.second)));
      if (cluster_tree.back().left_child > cluster_tree.back().right_child)
      {
        std::swap(cluster_tree.back().left_child, cluster_tree.back().right_child);
      }

      if (original_distance.dimensionsize() > 2)
      {
        //pick minimum-distance pair i,j and merge them

        //calculate parameter for lance-williams formula
        float alpha_i = (float)(clusters[min.first].size() / (float)(clusters[min.first].size() + clusters[min.second].size()));
        float alpha_j = (float)(clusters[min.second].size() / (float)(clusters[min.first].size() + clusters[min.second].size()));
        //~ std::cout << alpha_i << '\t' << alpha_j << std::endl;

        //pushback elements of second to first (and then erase second)
        clusters[min.second].insert(clusters[min.first].begin(), clusters[min.first].end());
        // erase first one
        clusters.erase(clusters.begin() + min.first);

        //update original_distance matrix
        //average linkage: new distance between clusters is the minimum distance between elements of each cluster
        //lance-williams update for d((i,j),k): (m_i/m_i+m_j)* d(i,k) + (m_j/m_i+m_j)* d(j,k) ; m_x is the number of elements in cluster x
        for (Size k = 0; k < min.second; ++k)
        {
          float dik = original_distance.getValue(min.first, k);
          float djk = original_distance.getValue(min.second, k);
          original_distance.setValueQuick(min.second, k, (alpha_i * dik + alpha_j * djk));
        }
        for (Size k = min.second + 1; k < original_distance.dimensionsize(); ++k)
        {
          float dik = original_distance.getValue(min.first, k);
          float djk = original_distance.getValue(min.second, k);
          original_distance.setValueQuick(k, min.second, (alpha_i * dik + alpha_j * djk));
        }

        //reduce
        original_distance.reduce(min.first);

        //update minimum-distance pair
        original_distance.updateMinElement();

        //get min-pair from triangular matrix
        min = original_distance.getMinElementCoordinates();
      }
      else
      {
        break;
      }
      setProgress(overall_cluster_steps - original_distance.dimensionsize());

      //repeat until only two cluster remains, last step skips matrix operations
    }
    //fill tree with dummy nodes
    Size sad(*clusters.front().begin());
    for (Size i = 1; (i < clusters.size()) && (cluster_tree.size() < cluster_tree.capacity()); ++i)
    {
      cluster_tree.push_back(BinaryTreeNode(sad, *clusters[i].begin(), -1.0));
    }

    endProgress();
  }
Example #19
0
DistanceMatrix *Johnson(dgraph *g)
{
     assert(g);

     unsigned int v_id = g->add_vertex();
     unsigned int it = 0;

     for (it = 0; it < g->vsize(); it++)
     {
	  if (it == v_id)
	       continue;

	  // Adding edge from newly created vertex to all 
	  // existing vertices with edge cost 0.
	  g->add_edge(v_id, it, 0);
     }

     if (!BellmanFord(g, g->get_vertex(v_id)))
     {
	  // Return an invalid matrix if Bellman Ford algorithm returns false.
	  // This means there exits a negetive weight cycle.
	  return (new DistanceMatrix(0));
     }

     // If Bellman Ford returns true then d-values of all the vertices contain
     // shortest distances to all vertices from newly created vertex (v_id).
     int *h = (int*) calloc(g->vsize(), sizeof(int));
     assert(h);

     for (it = 0; it < g->vsize(); it++)
     {
	  assert(g->get_vertex(it));
	  h[it] = g->get_vertex(it)->d;
     }


     // Reassign the weights of the edges so that they are non negetive.
     // w'(u,v) = w(u,v) + h(u) - h(v)
     for (it = 0; it < g->vsize(); it++)
     {
	  vertex *u = g->get_vertex(it);
	  assert(u);

	  unsigned int is = 0;
	  for (is = 0; is < u->edges.size(); is++)
	  {
	       assert(u->edges[is]);
	       unsigned int v_index = u->edges[is]->ends[DESTINATION];
	       vertex *v = g->get_vertex(v_index);
	       assert(v);
	       int new_cost = u->edges[is]->cost + h[it] - h[v_index];
	       u->edges[is]->cost = new_cost;
	       //g->set_edge_cost(it, u->edges[is]->ends[DESTINATION], new_cost);
	  }
     }
     
     DistanceMatrix *D = new DistanceMatrix(g->vsize());
     
     for (it = 0; it < g->vsize(); it++)
     {
	  vertex *u = g->get_vertex(it);

	  Dijkstra(g, g->get_vertex(it));
	  
	  unsigned int is = 0;
	  for (is = 0; is < g->vsize(); is++)
	  {
	       vertex *v = g->get_vertex(is);
	       assert(v);
	       
	       D->SetDistance(it, is, v->d + h[is] - h[it]);
	  }
     }

     // Revert back the original edge weights.
     for (it = 0; it < g->vsize(); it++)
     {
	  vertex *u = g->get_vertex(it);
	  assert(u);

	  unsigned int is = 0;
	  for (is = 0; is < u->edges.size(); is++)
	  {
	       assert(u->edges[is]);
	       unsigned int v_index = u->edges[is]->ends[DESTINATION];
	       vertex *v = g->get_vertex(v_index);
	       assert(v);
	       int old_cost = u->edges[is]->cost + h[v_index] - h[it];
	       u->edges[is]->cost = old_cost;
	  }
     }
     
     //Remove the extra vertex.

     return D;
}
Example #20
0
  void CompleteLinkage::operator()(DistanceMatrix<float> & original_distance, std::vector<BinaryTreeNode> & cluster_tree, const float threshold /*=1*/) const
  {
    // attention: clustering process is done by clustering the indices
    // pointing to elements in inputvector and distances in inputmatrix

    // input MUST have >= 2 elements!
    if (original_distance.dimensionsize() < 2)
    {
      throw ClusterFunctor::InsufficientInput(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Distance matrix to start from only contains one element");
    }

    std::vector<std::set<Size> > clusters(original_distance.dimensionsize());
    for (Size i = 0; i < original_distance.dimensionsize(); ++i)
    {
      clusters[i].insert(i);
    }

    cluster_tree.clear();
    cluster_tree.reserve(original_distance.dimensionsize() - 1);

    // Initial minimum-distance pair
    original_distance.updateMinElement();
    std::pair<Size, Size> min = original_distance.getMinElementCoordinates();

    Size overall_cluster_steps(original_distance.dimensionsize());
    startProgress(0, original_distance.dimensionsize(), "clustering data");

    while (original_distance(min.first, min.second) < threshold)
    {
      //grow the tree
      cluster_tree.push_back(BinaryTreeNode(*(clusters[min.second].begin()), *(clusters[min.first].begin()), original_distance(min.first, min.second)));
      if (cluster_tree.back().left_child > cluster_tree.back().right_child)
      {
        std::swap(cluster_tree.back().left_child, cluster_tree.back().right_child);
      }

      if (original_distance.dimensionsize() > 2)
      {
        //pick minimum-distance pair i,j and merge them

        //pushback elements of second to first (and then erase second)
        clusters[min.second].insert(clusters[min.first].begin(), clusters[min.first].end());
        // erase first one
        clusters.erase(clusters.begin() + min.first);

        //update original_distance matrix
        //complete linkage: new distance between clusters is the minimum distance between elements of each cluster
        //lance-williams update for d((i,j),k): 0.5* d(i,k) + 0.5* d(j,k) + 0.5* |d(i,k)-d(j,k)|
        for (Size k = 0; k < min.second; ++k)
        {
          float dik = original_distance.getValue(min.first, k);
          float djk = original_distance.getValue(min.second, k);
          original_distance.setValueQuick(min.second, k, (0.5f * dik + 0.5f * djk + 0.5f * std::fabs(dik - djk)));
        }
        for (Size k = min.second + 1; k < original_distance.dimensionsize(); ++k)
        {
          float dik = original_distance.getValue(min.first, k);
          float djk = original_distance.getValue(min.second, k);
          original_distance.setValueQuick(k, min.second, (0.5f * dik + 0.5f * djk + 0.5f * std::fabs(dik - djk)));
        }

        //reduce
        original_distance.reduce(min.first);

        //update minimum-distance pair
        original_distance.updateMinElement();

        //get new min-pair
        min = original_distance.getMinElementCoordinates();
      }
      else
      {
        break;
      }
      setProgress(overall_cluster_steps - original_distance.dimensionsize());

      //repeat until only two cluster remains or threshold exceeded, last step skips matrix operations
    }
    //fill tree with dummy nodes
    Size sad(*clusters.front().begin());
    for (Size i = 1; i < clusters.size() && (cluster_tree.size() < cluster_tree.capacity()); ++i)
    {
      cluster_tree.push_back(BinaryTreeNode(sad, *clusters[i].begin(), -1.0));
    }
    //~ while(cluster_tree.size() < cluster_tree.capacity())
    //~ {
    //~ cluster_tree.push_back(BinaryTreeNode(0,1,-1.0));
    //~ }

    endProgress();
  }