Пример #1
0
int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
	try {
		numSeqs++;
		
		map<string, int>::iterator childPointer;
		
		int currentNode = 0;
		int level = 0;
		
		tree[0].accessions.push_back(seqName);
		m->removeConfidences(seqTaxonomy);
		
		string taxon;// = getNextTaxon(seqTaxonomy);
	
		while(seqTaxonomy != ""){
			
			level++;
		
			if (m->control_pressed) { return 0; }
			
			//somehow the parent is getting one too many accnos
			//use print to reassign the taxa id
			taxon = getNextTaxon(seqTaxonomy, seqName);
            
            if (m->debug) { m->mothurOut(seqName +'\t' + taxon +'\n'); }
			
			if (taxon == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) {  uniqueTaxonomies.insert(currentNode); } break;  }
			
			childPointer = tree[currentNode].children.find(taxon);
			
			if(childPointer != tree[currentNode].children.end()){	//if the node already exists, move on
				currentNode = childPointer->second;
				tree[currentNode].accessions.push_back(seqName);
				name2Taxonomy[seqName] = currentNode;
			}
			else{											//otherwise, create it
				tree.push_back(TaxNode(taxon));
				numNodes++;
				tree[currentNode].children[taxon] = numNodes-1;
				tree[numNodes-1].parent = currentNode;
				
				currentNode = tree[currentNode].children[taxon];
				tree[currentNode].accessions.push_back(seqName);
				name2Taxonomy[seqName] = currentNode;
			}
	
			if (seqTaxonomy == "") {   uniqueTaxonomies.insert(currentNode);	}

		}
        
        //save maxLevel for binning the unclassified seqs
        if (level > maxLevel) { maxLevel = level; }
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "PhyloTree", "addSeqToTree");
		exit(1);
	}
}
Пример #2
0
vector<string> PhyloTree::getSeqs(string seqTaxonomy){
	try {
        string taxCopy = seqTaxonomy;
        vector<string> names;
        map<string, int>::iterator childPointer;
		
		int currentNode = 0;

        m->removeConfidences(seqTaxonomy);
        
        string taxon;
        while(seqTaxonomy != ""){
			
			if (m->control_pressed) { return names; }
			
			taxon = getNextTaxon(seqTaxonomy, "");
            
            if (m->debug) { m->mothurOut(taxon +'\n'); }
			
			if (taxon == "") {  m->mothurOut(taxCopy + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); break;  }
			
			childPointer = tree[currentNode].children.find(taxon);
			
			if(childPointer != tree[currentNode].children.end()){	//if the node already exists, move on
				currentNode = childPointer->second;
			}
			else{											//otherwise, error this taxonomy is not in tree
				m->mothurOut("[ERROR]: " + taxCopy + " is not in taxonomy tree, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
			}
            
			if (seqTaxonomy == "") {   names = tree[currentNode].accessions;	}
		}
        
        return names;
    }
	catch(exception& e) {
		m->errorOut(e, "PhyloTree", "getSeqs");
		exit(1);
	}
}
Пример #3
0
int PhyloSummary::addSeqToTree(string seqTaxonomy, map<string, bool> containsGroup){
	try {
		numSeqs++;
		
		map<string, int>::iterator childPointer;
		
		int currentNode = 0;
		string taxon;
		
		int level = 0;
		
		//are there confidence scores, if so remove them
		if (seqTaxonomy.find_first_of('(') != -1) {  m->removeConfidences(seqTaxonomy);	}
		
		while (seqTaxonomy != "") {
			
            level++;
            
			if (m->control_pressed) { return 0; }
			
			//somehow the parent is getting one too many accnos
			//use print to reassign the taxa id
			taxon = getNextTaxon(seqTaxonomy);
			
			childPointer = tree[currentNode].children.find(taxon);
			
			if(childPointer != tree[currentNode].children.end()){	//if the node already exists, update count and move on
                for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
                    if (itGroup->second == true) {
                        tree[childPointer->second].groupCount[itGroup->first]++;
                    }
                }
					
				tree[childPointer->second].total++;
				
				currentNode = childPointer->second;
			}else{	
				if (ignore) {
					
					tree.push_back(rawTaxNode(taxon));
					int index = tree.size() - 1;
					
					tree[index].parent = currentNode;
					tree[index].level = level;
					tree[index].total = 1;
					tree[currentNode].children[taxon] = index;
						
                    for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
                        if (itGroup->second == true) {
                            tree[index].groupCount[itGroup->first]++;
                        }
                    }
					
					currentNode = index;
					
				}else{ //otherwise, error
					m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
					break;
				}
			}
		}
        
        if (level > maxLevel) { maxLevel = level; }
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "PhyloSummary", "addSeqToTree");
		exit(1);
	}
}
Пример #4
0
int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
	try {
				
		numSeqs++;
		
		map<string, int>::iterator childPointer;
		
		int currentNode = 0;
		string taxon;
		
		int level = 0;
		
		//are there confidence scores, if so remove them
		if (seqTaxonomy.find_first_of('(') != -1) {  m->removeConfidences(seqTaxonomy);	}
		
		while (seqTaxonomy != "") {
			
            level++;
            
			if (m->control_pressed) { return 0; }
			
			//somehow the parent is getting one too many accnos
			//use print to reassign the taxa id
			taxon = getNextTaxon(seqTaxonomy);
			
			childPointer = tree[currentNode].children.find(taxon);
			
			if(childPointer != tree[currentNode].children.end()){	//if the node already exists, update count and move on
				int thisCount = 1;
                
                if (groupmap != NULL) {
					//find out the sequences group
					string group = groupmap->getGroup(seqName);
					
					if (group == "not found") {  m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
					
					//do you have a count for this group?
					map<string, int>::iterator itGroup = tree[childPointer->second].groupCount.find(group);
					
					//if yes, increment it - there should not be a case where we can't find it since we load group in read
					if (itGroup != tree[childPointer->second].groupCount.end()) {
						tree[childPointer->second].groupCount[group]++;
					}
				}else if (ct != NULL) {
                    if (ct->hasGroupInfo()) {
                        vector<int> groupCounts = ct->getGroupCounts(seqName);
                        vector<string> groups = ct->getNamesOfGroups();
                        for (int i = 0; i < groups.size(); i++) {
                            
                            if (groupCounts[i] != 0) {
                                //do you have a count for this group?
                                map<string, int>::iterator itGroup = tree[childPointer->second].groupCount.find(groups[i]);
                                
                                //if yes, increment it - there should not be a case where we can't find it since we load group in read
                                if (itGroup != tree[childPointer->second].groupCount.end()) {
                                    tree[childPointer->second].groupCount[groups[i]] += groupCounts[i];
                                }
                            }
                        }
                    }
                    thisCount = ct->getNumSeqs(seqName);
                }
				
				tree[childPointer->second].total += thisCount;

				currentNode = childPointer->second;
			}else{	
				if (ignore) {
						
					tree.push_back(rawTaxNode(taxon));
					int index = tree.size() - 1;
				
					tree[index].parent = currentNode;
					tree[index].level = level;
					tree[currentNode].children[taxon] = index;
                    int thisCount = 1;
					
					//initialize groupcounts
					if (groupmap != NULL) {
						vector<string> mGroups = groupmap->getNamesOfGroups();
						for (int j = 0; j < mGroups.size(); j++) {
							tree[index].groupCount[mGroups[j]] = 0;
						}
						
						//find out the sequences group
						string group = groupmap->getGroup(seqName);
						
						if (group == "not found") {  m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
						
						//do you have a count for this group?
						map<string, int>::iterator itGroup = tree[index].groupCount.find(group);
						
						//if yes, increment it - there should not be a case where we can't find it since we load group in read
						if (itGroup != tree[index].groupCount.end()) {
							tree[index].groupCount[group]++;
						}
					}else if (ct != NULL) {
                        if (ct->hasGroupInfo()) {
                            vector<string> mGroups = ct->getNamesOfGroups();
                            for (int j = 0; j < mGroups.size(); j++) {
                                tree[index].groupCount[mGroups[j]] = 0;
                            }
                            vector<int> groupCounts = ct->getGroupCounts(seqName);
                            vector<string> groups = ct->getNamesOfGroups();
                        
                            for (int i = 0; i < groups.size(); i++) {
                                if (groupCounts[i] != 0) {
                                   
                                    //do you have a count for this group?
                                    map<string, int>::iterator itGroup = tree[index].groupCount.find(groups[i]);
                                     
                                    //if yes, increment it - there should not be a case where we can't find it since we load group in read
                                    if (itGroup != tree[index].groupCount.end()) {
                                        tree[index].groupCount[groups[i]]+=groupCounts[i];
                                    }
                                }
                            }
                        }
                        thisCount = ct->getNumSeqs(seqName);
                    }
					
                    tree[index].total = thisCount;
					currentNode = index;
					
				}else{ //otherwise, error
					m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
					break;
				}
			}
        }
        
        if (level > maxLevel) { maxLevel = level; }
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "PhyloSummary", "addSeqToTree");
		exit(1);
	}
}