int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ try { numSeqs++; map<string, int>::iterator childPointer; int currentNode = 0; int level = 0; tree[0].accessions.push_back(seqName); m->removeConfidences(seqTaxonomy); string taxon;// = getNextTaxon(seqTaxonomy); while(seqTaxonomy != ""){ level++; if (m->control_pressed) { return 0; } //somehow the parent is getting one too many accnos //use print to reassign the taxa id taxon = getNextTaxon(seqTaxonomy, seqName); if (m->debug) { m->mothurOut(seqName +'\t' + taxon +'\n'); } if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) { uniqueTaxonomies.insert(currentNode); } break; } childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; tree[currentNode].accessions.push_back(seqName); name2Taxonomy[seqName] = currentNode; } else{ //otherwise, create it tree.push_back(TaxNode(taxon)); numNodes++; tree[currentNode].children[taxon] = numNodes-1; tree[numNodes-1].parent = currentNode; currentNode = tree[currentNode].children[taxon]; tree[currentNode].accessions.push_back(seqName); name2Taxonomy[seqName] = currentNode; } if (seqTaxonomy == "") { uniqueTaxonomies.insert(currentNode); } } //save maxLevel for binning the unclassified seqs if (level > maxLevel) { maxLevel = level; } return 0; } catch(exception& e) { m->errorOut(e, "PhyloTree", "addSeqToTree"); exit(1); } }
vector<string> PhyloTree::getSeqs(string seqTaxonomy){ try { string taxCopy = seqTaxonomy; vector<string> names; map<string, int>::iterator childPointer; int currentNode = 0; m->removeConfidences(seqTaxonomy); string taxon; while(seqTaxonomy != ""){ if (m->control_pressed) { return names; } taxon = getNextTaxon(seqTaxonomy, ""); if (m->debug) { m->mothurOut(taxon +'\n'); } if (taxon == "") { m->mothurOut(taxCopy + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); break; } childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; } else{ //otherwise, error this taxonomy is not in tree m->mothurOut("[ERROR]: " + taxCopy + " is not in taxonomy tree, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names; } if (seqTaxonomy == "") { names = tree[currentNode].accessions; } } return names; } catch(exception& e) { m->errorOut(e, "PhyloTree", "getSeqs"); exit(1); } }
int PhyloSummary::addSeqToTree(string seqTaxonomy, map<string, bool> containsGroup){ try { numSeqs++; map<string, int>::iterator childPointer; int currentNode = 0; string taxon; int level = 0; //are there confidence scores, if so remove them if (seqTaxonomy.find_first_of('(') != -1) { m->removeConfidences(seqTaxonomy); } while (seqTaxonomy != "") { level++; if (m->control_pressed) { return 0; } //somehow the parent is getting one too many accnos //use print to reassign the taxa id taxon = getNextTaxon(seqTaxonomy); childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) { if (itGroup->second == true) { tree[childPointer->second].groupCount[itGroup->first]++; } } tree[childPointer->second].total++; currentNode = childPointer->second; }else{ if (ignore) { tree.push_back(rawTaxNode(taxon)); int index = tree.size() - 1; tree[index].parent = currentNode; tree[index].level = level; tree[index].total = 1; tree[currentNode].children[taxon] = index; for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) { if (itGroup->second == true) { tree[index].groupCount[itGroup->first]++; } } currentNode = index; }else{ //otherwise, error m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine(); break; } } } if (level > maxLevel) { maxLevel = level; } return 0; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "addSeqToTree"); exit(1); } }
int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ try { numSeqs++; map<string, int>::iterator childPointer; int currentNode = 0; string taxon; int level = 0; //are there confidence scores, if so remove them if (seqTaxonomy.find_first_of('(') != -1) { m->removeConfidences(seqTaxonomy); } while (seqTaxonomy != "") { level++; if (m->control_pressed) { return 0; } //somehow the parent is getting one too many accnos //use print to reassign the taxa id taxon = getNextTaxon(seqTaxonomy); childPointer = tree[currentNode].children.find(taxon); if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on int thisCount = 1; if (groupmap != NULL) { //find out the sequences group string group = groupmap->getGroup(seqName); if (group == "not found") { m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } //do you have a count for this group? map<string, int>::iterator itGroup = tree[childPointer->second].groupCount.find(group); //if yes, increment it - there should not be a case where we can't find it since we load group in read if (itGroup != tree[childPointer->second].groupCount.end()) { tree[childPointer->second].groupCount[group]++; } }else if (ct != NULL) { if (ct->hasGroupInfo()) { vector<int> groupCounts = ct->getGroupCounts(seqName); vector<string> groups = ct->getNamesOfGroups(); for (int i = 0; i < groups.size(); i++) { if (groupCounts[i] != 0) { //do you have a count for this group? map<string, int>::iterator itGroup = tree[childPointer->second].groupCount.find(groups[i]); //if yes, increment it - there should not be a case where we can't find it since we load group in read if (itGroup != tree[childPointer->second].groupCount.end()) { tree[childPointer->second].groupCount[groups[i]] += groupCounts[i]; } } } } thisCount = ct->getNumSeqs(seqName); } tree[childPointer->second].total += thisCount; currentNode = childPointer->second; }else{ if (ignore) { tree.push_back(rawTaxNode(taxon)); int index = tree.size() - 1; tree[index].parent = currentNode; tree[index].level = level; tree[currentNode].children[taxon] = index; int thisCount = 1; //initialize groupcounts if (groupmap != NULL) { vector<string> mGroups = groupmap->getNamesOfGroups(); for (int j = 0; j < mGroups.size(); j++) { tree[index].groupCount[mGroups[j]] = 0; } //find out the sequences group string group = groupmap->getGroup(seqName); if (group == "not found") { m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } //do you have a count for this group? map<string, int>::iterator itGroup = tree[index].groupCount.find(group); //if yes, increment it - there should not be a case where we can't find it since we load group in read if (itGroup != tree[index].groupCount.end()) { tree[index].groupCount[group]++; } }else if (ct != NULL) { if (ct->hasGroupInfo()) { vector<string> mGroups = ct->getNamesOfGroups(); for (int j = 0; j < mGroups.size(); j++) { tree[index].groupCount[mGroups[j]] = 0; } vector<int> groupCounts = ct->getGroupCounts(seqName); vector<string> groups = ct->getNamesOfGroups(); for (int i = 0; i < groups.size(); i++) { if (groupCounts[i] != 0) { //do you have a count for this group? map<string, int>::iterator itGroup = tree[index].groupCount.find(groups[i]); //if yes, increment it - there should not be a case where we can't find it since we load group in read if (itGroup != tree[index].groupCount.end()) { tree[index].groupCount[groups[i]]+=groupCounts[i]; } } } } thisCount = ct->getNumSeqs(seqName); } tree[index].total = thisCount; currentNode = index; }else{ //otherwise, error m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine(); break; } } } if (level > maxLevel) { maxLevel = level; } return 0; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "addSeqToTree"); exit(1); } }