Exemplo n.º 1
0
void PhyloTree::fillOutTree(int index, vector<TaxNode>& copy) {
	try {
	
		map<string,int>::iterator it;
		
		it = copy[index].children.find("unclassified");
		if (it == copy[index].children.end()) { //no unclassified at this level
			string taxon = "unclassified";
			copy.push_back(TaxNode(taxon));
			copy[index].children[taxon] = copy.size()-1;
			copy[copy.size()-1].parent = index;
			copy[copy.size()-1].level = copy[index].level + 1;
		}
		
		if (tree[index].level < maxLevel) {
			for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children
				fillOutTree(it->second, copy);
			}
		}

	}
	catch(exception& e) {
		m->errorOut(e, "PhyloTree", "fillOutTree");
		exit(1);
	}
}
Exemplo n.º 2
0
int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
	try {
		numSeqs++;
		
		map<string, int>::iterator childPointer;
		
		int currentNode = 0;
		int level = 0;
		
		tree[0].accessions.push_back(seqName);
		m->removeConfidences(seqTaxonomy);
		
		string taxon;// = getNextTaxon(seqTaxonomy);
	
		while(seqTaxonomy != ""){
			
			level++;
		
			if (m->control_pressed) { return 0; }
			
			//somehow the parent is getting one too many accnos
			//use print to reassign the taxa id
			taxon = getNextTaxon(seqTaxonomy, seqName);
            
            if (m->debug) { m->mothurOut(seqName +'\t' + taxon +'\n'); }
			
			if (taxon == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) {  uniqueTaxonomies.insert(currentNode); } break;  }
			
			childPointer = tree[currentNode].children.find(taxon);
			
			if(childPointer != tree[currentNode].children.end()){	//if the node already exists, move on
				currentNode = childPointer->second;
				tree[currentNode].accessions.push_back(seqName);
				name2Taxonomy[seqName] = currentNode;
			}
			else{											//otherwise, create it
				tree.push_back(TaxNode(taxon));
				numNodes++;
				tree[currentNode].children[taxon] = numNodes-1;
				tree[numNodes-1].parent = currentNode;
				
				currentNode = tree[currentNode].children[taxon];
				tree[currentNode].accessions.push_back(seqName);
				name2Taxonomy[seqName] = currentNode;
			}
	
			if (seqTaxonomy == "") {   uniqueTaxonomies.insert(currentNode);	}

		}
        
        //save maxLevel for binning the unclassified seqs
        if (level > maxLevel) { maxLevel = level; }
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "PhyloTree", "addSeqToTree");
		exit(1);
	}
}
Exemplo n.º 3
0
PhyloTree::PhyloTree(){
	try {
		m = MothurOut::getInstance();
		numNodes = 1;
		numSeqs = 0;
		tree.push_back(TaxNode("Root"));
		tree[0].heirarchyID = "0";
		maxLevel = 0;
		calcTotals = true;
		addSeqToTree("unknown", "unknown;");
	}
	catch(exception& e) {
		m->errorOut(e, "PhyloTree", "PhyloTree");
		exit(1);
	}
}
Exemplo n.º 4
0
PhyloTree::PhyloTree(string tfile){
	try {
		m = MothurOut::getInstance();
        current = CurrentFile::getInstance();
		numNodes = 1;
		numSeqs = 0;
		tree.push_back(TaxNode("Root"));
		tree[0].heirarchyID = "0";
        tree[0].level = 0;
		maxLevel = 0;
		calcTotals = true;
		string name, tax;
		
        map<string, string> temp;
        util.readTax(tfile, temp, true);
        
        for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
            addSeqToTree(itTemp->first, itTemp->second);
            temp.erase(itTemp++);
        }
        
        string unknownTax = "unknown;";
        //added last taxon until you get desired level
		for (int i = 1; i < maxLevel; i++) {
			unknownTax += "unknown_unclassfied;";
		}
        addSeqToTree("unknown", unknownTax);
        
        assignHeirarchyIDs(0);
        
		//create file for summary if needed
		setUp(tfile);
	}
	catch(exception& e) {
		m->errorOut(e, "PhyloTree", "PhyloTree");
		exit(1);
	}
}
Exemplo n.º 5
0
void PhyloTree::binUnclassified(string file){
	try {
	
		ofstream out;
		m->openOutputFile(file, out);
		
		map<string, int>::iterator itBin;
		map<string, int>::iterator childPointer;
		
		vector<TaxNode> copy = tree;
		
		//fill out tree
		fillOutTree(0, copy);
	
		//get leaf nodes that may need extension
		for (int i = 0; i < copy.size(); i++) {  

			if (copy[i].children.size() == 0) {
				leafNodes[i] = i;
			}
		}
		
        if (m->debug) { m->mothurOut("maxLevel = " + toString(maxLevel) +'\n'); }
        
		int copyNodes = copy.size();
	
		//go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
		map<int, int>::iterator itLeaf;
		for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) {
			
			if (m->control_pressed) {  out.close(); break;  }
			
			int level = copy[itLeaf->second].level;
			int currentNode = itLeaf->second;
            
            if (m->debug) { m->mothurOut(copy[currentNode].name +'\n'); }
			
			//this sequence is unclassified at some levels
			while(level < maxLevel){
		
				level++;
                if (m->debug) { m->mothurOut("level = " + toString(level) +'\n'); }
			
				string taxon = "unclassified";	
				
				//does the parent have a child names 'unclassified'?
				childPointer = copy[currentNode].children.find(taxon);
				
				if(childPointer != copy[currentNode].children.end()){	//if the node already exists, move on
					currentNode = childPointer->second; //currentNode becomes 'unclassified'
				}
				else{											//otherwise, create it
					copy.push_back(TaxNode(taxon));
					copyNodes++;
					copy[currentNode].children[taxon] = copyNodes-1;
					copy[copyNodes-1].parent = currentNode;
					copy[copyNodes-1].level = copy[currentNode].level + 1;
									
					currentNode = copy[currentNode].children[taxon];
				}
			}
		}
		
		if (!m->control_pressed) {
			//print copy tree
			print(out, copy);
		}
				
	}
	catch(exception& e) {
		m->errorOut(e, "PhyloTree", "binUnclassified");
		exit(1);
	}
}