void PhyloTree::fillOutTree(int index, vector<TaxNode>& copy) { try { map<string,int>::iterator it; it = copy[index].children.find("unclassified"); if (it == copy[index].children.end()) { //no unclassified at this level string taxon = "unclassified"; copy.push_back(TaxNode(taxon)); copy[index].children[taxon] = copy.size()-1; copy[copy.size()-1].parent = index; copy[copy.size()-1].level = copy[index].level + 1; } if (tree[index].level < maxLevel) { for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children fillOutTree(it->second, copy); } } } catch(exception& e) { m->errorOut(e, "PhyloTree", "fillOutTree"); exit(1); } }
int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ try { numSeqs++; map<string, int>::iterator childPointer; int currentNode = 0; int level = 0; tree[0].accessions.push_back(seqName); m->removeConfidences(seqTaxonomy); string taxon;// = getNextTaxon(seqTaxonomy); while(seqTaxonomy != ""){ level++; if (m->control_pressed) { return 0; } //somehow the parent is getting one too many accnos //use print to reassign the taxa id taxon = getNextTaxon(seqTaxonomy, seqName); if (m->debug) { m->mothurOut(seqName +'\t' + taxon +'\n'); } if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) { uniqueTaxonomies.insert(currentNode); } break; } childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; tree[currentNode].accessions.push_back(seqName); name2Taxonomy[seqName] = currentNode; } else{ //otherwise, create it tree.push_back(TaxNode(taxon)); numNodes++; tree[currentNode].children[taxon] = numNodes-1; tree[numNodes-1].parent = currentNode; currentNode = tree[currentNode].children[taxon]; tree[currentNode].accessions.push_back(seqName); name2Taxonomy[seqName] = currentNode; } if (seqTaxonomy == "") { uniqueTaxonomies.insert(currentNode); } } //save maxLevel for binning the unclassified seqs if (level > maxLevel) { maxLevel = level; } return 0; } catch(exception& e) { m->errorOut(e, "PhyloTree", "addSeqToTree"); exit(1); } }
PhyloTree::PhyloTree(){ try { m = MothurOut::getInstance(); numNodes = 1; numSeqs = 0; tree.push_back(TaxNode("Root")); tree[0].heirarchyID = "0"; maxLevel = 0; calcTotals = true; addSeqToTree("unknown", "unknown;"); } catch(exception& e) { m->errorOut(e, "PhyloTree", "PhyloTree"); exit(1); } }
PhyloTree::PhyloTree(string tfile){ try { m = MothurOut::getInstance(); current = CurrentFile::getInstance(); numNodes = 1; numSeqs = 0; tree.push_back(TaxNode("Root")); tree[0].heirarchyID = "0"; tree[0].level = 0; maxLevel = 0; calcTotals = true; string name, tax; map<string, string> temp; util.readTax(tfile, temp, true); for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) { addSeqToTree(itTemp->first, itTemp->second); temp.erase(itTemp++); } string unknownTax = "unknown;"; //added last taxon until you get desired level for (int i = 1; i < maxLevel; i++) { unknownTax += "unknown_unclassfied;"; } addSeqToTree("unknown", unknownTax); assignHeirarchyIDs(0); //create file for summary if needed setUp(tfile); } catch(exception& e) { m->errorOut(e, "PhyloTree", "PhyloTree"); exit(1); } }
void PhyloTree::binUnclassified(string file){ try { ofstream out; m->openOutputFile(file, out); map<string, int>::iterator itBin; map<string, int>::iterator childPointer; vector<TaxNode> copy = tree; //fill out tree fillOutTree(0, copy); //get leaf nodes that may need extension for (int i = 0; i < copy.size(); i++) { if (copy[i].children.size() == 0) { leafNodes[i] = i; } } if (m->debug) { m->mothurOut("maxLevel = " + toString(maxLevel) +'\n'); } int copyNodes = copy.size(); //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary map<int, int>::iterator itLeaf; for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) { if (m->control_pressed) { out.close(); break; } int level = copy[itLeaf->second].level; int currentNode = itLeaf->second; if (m->debug) { m->mothurOut(copy[currentNode].name +'\n'); } //this sequence is unclassified at some levels while(level < maxLevel){ level++; if (m->debug) { m->mothurOut("level = " + toString(level) +'\n'); } string taxon = "unclassified"; //does the parent have a child names 'unclassified'? childPointer = copy[currentNode].children.find(taxon); if(childPointer != copy[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; //currentNode becomes 'unclassified' } else{ //otherwise, create it copy.push_back(TaxNode(taxon)); copyNodes++; copy[currentNode].children[taxon] = copyNodes-1; copy[copyNodes-1].parent = currentNode; copy[copyNodes-1].level = copy[currentNode].level + 1; currentNode = copy[currentNode].children[taxon]; } } } if (!m->control_pressed) { //print copy tree print(out, copy); } } catch(exception& e) { m->errorOut(e, "PhyloTree", "binUnclassified"); exit(1); } }