//********************************************************************************************************************** vector<string> ClassifyOtuCommand::findConsensusTaxonomy(vector<string> names, int& size, string& conTax) { try{ conTax = ""; vector<string> allNames; map<string, string>::iterator it; map<string, string>::iterator it2; //create a tree containing sequences from this bin PhyloTree* phylo = new PhyloTree(); size = 0; for (int i = 0; i < names.size(); i++) { //if namesfile include the names if (namefile != "") { //is this sequence in the name file - namemap maps seqName -> repSeqName it2 = nameMap.find(names[i]); if (it2 == nameMap.end()) { //this name is not in name file, skip it m->mothurOut(names[i] + " is not in your name file. I will not include it in the consensus."); m->mothurOutEndLine(); }else{ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique it = taxMap.find(it2->second); if (it == taxMap.end()) { //this name is not in taxonomy file, skip it if (names[i] != it2->second) { m->mothurOut(names[i] + " is represented by " + it2->second + " and is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); } else { m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); } }else{ //add seq to tree phylo->addSeqToTree(names[i], it->second); size++; allNames.push_back(names[i]); } } }else{ //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique it = taxMap.find(names[i]); if (it == taxMap.end()) { //this name is not in taxonomy file, skip it m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine(); }else{ if (countfile != "") { int numDups = ct->getNumSeqs(names[i]); for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); } size += numDups; }else{ //add seq to tree phylo->addSeqToTree(names[i], it->second); size++; } allNames.push_back(names[i]); } } if (m->control_pressed) { delete phylo; return allNames; } } //build tree phylo->assignHeirarchyIDs(0); TaxNode currentNode = phylo->get(0); int myLevel = 0; //at each level while (currentNode.children.size() != 0) { //you still have more to explore TaxNode bestChild; int bestChildSize = 0; //go through children for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) { TaxNode temp = phylo->get(itChild->second); //select child with largest accesions - most seqs assigned to it if (temp.accessions.size() > bestChildSize) { bestChild = phylo->get(itChild->second); bestChildSize = temp.accessions.size(); } } //phylotree adds an extra unknown so we want to remove that if (bestChild.name == "unknown") { bestChildSize--; } //is this taxonomy above cutoff int consensusConfidence = ceil((bestChildSize / (float) size) * 100); if (consensusConfidence >= cutoff) { //if yes, add it if (probs) { conTax += bestChild.name + "(" + toString(consensusConfidence) + ");"; }else{ conTax += bestChild.name + ";"; } myLevel++; }else{ //if no, quit break; } //move down a level currentNode = bestChild; } if (myLevel != phylo->getMaxLevel()) { while (myLevel != phylo->getMaxLevel()) { conTax += "unclassified;"; myLevel++; } } if (conTax == "") { conTax = "no_consensus;"; } delete phylo; return allNames; } catch(exception& e) { m->errorOut(e, "ClassifyOtuCommand", "findConsensusTaxonomy"); exit(1); } }
int SplitMatrix::splitClassify() { try { cutoff = int(cutoff); map<string, int> seqGroup; map<string, int>::iterator it; map<string, int>::iterator it2; int numGroups = 0; //build tree from users taxonomy file PhyloTree* phylo = new PhyloTree(); map<string, string> temp; m->readTax(taxFile, temp, true); for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) { phylo->addSeqToTree(itTemp->first, itTemp->second); temp.erase(itTemp++); } phylo->assignHeirarchyIDs(0); //make sure the cutoff is not greater than maxlevel if (cutoff > phylo->getMaxLevel()) { m->mothurOut("splitcutoff is greater than the longest taxonomy, using " + toString(phylo->getMaxLevel())); m->mothurOutEndLine(); cutoff = phylo->getMaxLevel(); } //for each node in tree for (int i = 0; i < phylo->getNumNodes(); i++) { //is this node within the cutoff TaxNode taxon = phylo->get(i); if (taxon.level == cutoff) {//if yes, then create group containing this nodes sequences if (taxon.accessions.size() > 1) { //if this taxon just has one seq its a singleton for (int j = 0; j < taxon.accessions.size(); j++) { seqGroup[taxon.accessions[j]] = numGroups; } numGroups++; } } } delete phylo; if (method == "classify") { splitDistanceFileByTax(seqGroup, numGroups); } else { createDistanceFilesFromTax(seqGroup, numGroups); } return 0; } catch(exception& e) { m->errorOut(e, "SplitMatrix", "splitClassify"); exit(1); } }