예제 #1
0
//**********************************************************************************************************************
vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, int& size, string& conTax) {
	try{
		conTax = "";
		vector<string> allNames;
		map<string, string>::iterator it;
		map<string, string>::iterator it2;

		//create a tree containing sequences from this bin
		PhyloTree* phylo = new PhyloTree();
		
		size = 0;
		for (int i = 0; i < names.size(); i++) {
	
			//if namesfile include the names
			if (namefile != "") {
	
				//is this sequence in the name file - namemap maps seqName -> repSeqName
				it2 = nameMap.find(names[i]);
				
				if (it2 == nameMap.end()) { //this name is not in name file, skip it
					m->mothurOut(names[i] + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
				}else{
					
					//is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
					it = taxMap.find(it2->second);
			
					if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
					
						if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
						else {  m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
					}else{
				
						//add seq to tree
						phylo->addSeqToTree(names[i], it->second);
						size++;
						allNames.push_back(names[i]);
					}
				}
				
			}else{
				//is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
				it = taxMap.find(names[i]);
		
				if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
					m->mothurOut(names[i] + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
				}else{
                    if (countfile != "") {
                        int numDups = ct->getNumSeqs(names[i]); 
                        for (int j = 0; j < numDups; j++) {  phylo->addSeqToTree(names[i], it->second);  }
                        size += numDups;
                    }else{
					//add seq to tree
                        phylo->addSeqToTree(names[i], it->second);
                        size++;  
                    }
                    allNames.push_back(names[i]);
				}
			}

			
			if (m->control_pressed) { delete phylo; return allNames; }
			
		}
		
		//build tree
		phylo->assignHeirarchyIDs(0);
		
		TaxNode currentNode = phylo->get(0);
		int myLevel = 0; 	
		//at each level
		while (currentNode.children.size() != 0) { //you still have more to explore
		
			TaxNode bestChild;
			int bestChildSize = 0;
			
			//go through children
			for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
				
				TaxNode temp = phylo->get(itChild->second);
				
				//select child with largest accesions - most seqs assigned to it
				if (temp.accessions.size() > bestChildSize) {
					bestChild = phylo->get(itChild->second);
					bestChildSize = temp.accessions.size();
				}
				
			}
            
            //phylotree adds an extra unknown so we want to remove that
            if (bestChild.name == "unknown") { bestChildSize--; }
				
			//is this taxonomy above cutoff
			int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
			
			if (consensusConfidence >= cutoff) { //if yes, add it
				if (probs) {
					conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
				}else{
					conTax += bestChild.name + ";";
				}
				myLevel++;
			}else{ //if no, quit
				break;
			}
			
			//move down a level
			currentNode = bestChild;
		}
		
		if (myLevel != phylo->getMaxLevel()) {
			while (myLevel != phylo->getMaxLevel()) {
				conTax += "unclassified;";
				myLevel++;
			}
		}		
		if (conTax == "") {  conTax = "no_consensus;";  }
		
		delete phylo;	
		
		return allNames;
			
	}
	catch(exception& e) {
		m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy");
		exit(1);
	}
}
예제 #2
0
int SplitMatrix::splitClassify() {
    try {
        cutoff = int(cutoff);

        map<string, int> seqGroup;
        map<string, int>::iterator it;
        map<string, int>::iterator it2;

        int numGroups = 0;

        //build tree from users taxonomy file
        PhyloTree* phylo = new PhyloTree();

        map<string, string> temp;
        m->readTax(taxFile, temp, true);

        for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
            phylo->addSeqToTree(itTemp->first, itTemp->second);
            temp.erase(itTemp++);
        }

        phylo->assignHeirarchyIDs(0);

        //make sure the cutoff is not greater than maxlevel
        if (cutoff > phylo->getMaxLevel()) {
            m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo->getMaxLevel()));
            m->mothurOutEndLine();
            cutoff = phylo->getMaxLevel();
        }

        //for each node in tree
        for (int i = 0; i < phylo->getNumNodes(); i++) {

            //is this node within the cutoff
            TaxNode taxon = phylo->get(i);

            if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences
                if (taxon.accessions.size() > 1) { //if this taxon just has one seq its a singleton
                    for (int j = 0; j < taxon.accessions.size(); j++) {
                        seqGroup[taxon.accessions[j]] = numGroups;
                    }
                    numGroups++;
                }
            }
        }

        delete phylo;

        if (method == "classify") {
            splitDistanceFileByTax(seqGroup, numGroups);
        } else {
            createDistanceFilesFromTax(seqGroup, numGroups);
        }

        return 0;

    }
    catch(exception& e) {
        m->errorOut(e, "SplitMatrix", "splitClassify");
        exit(1);
    }
}