Exemple #1
0
EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) { 
 try {
		
		data.clear(); //clear out old values
     
        CountTable* ct = t->getCountTable();
		
		if (m->control_pressed) { return data; }
		
		//initialize weighted score
		WScore[(groupA+groupB)] = 0.0;
		double D = 0.0;
		set<int> validBranches;
		
		vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
		
		//adding the wieghted sums from group i
		for (int j = 0; j < t->groupNodeInfo[groups[0]].size(); j++) { //the leaf nodes that have seqs from group i
			map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[0]][j]].pcount.find(groups[0]);
			int numSeqsInGroupI = it->second;
			
			double sum = getLengthToRoot(t, t->groupNodeInfo[groups[0]][j], groups[0], groups[1]);
			double weightedSum = ((numSeqsInGroupI * sum) / (double)ct->getGroupCount(groups[0]));
		
			D += weightedSum;
		}
		
		//adding the wieghted sums from group l
		for (int j = 0; j < t->groupNodeInfo[groups[1]].size(); j++) { //the leaf nodes that have seqs from group l
			map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[1]][j]].pcount.find(groups[1]);
			int numSeqsInGroupL = it->second;
			
			double sum = getLengthToRoot(t, t->groupNodeInfo[groups[1]][j], groups[0], groups[1]);
			double weightedSum = ((numSeqsInGroupL * sum) / (double)ct->getGroupCount(groups[1]));
		
			D += weightedSum;
		}
				
		//calculate u for the group comb 
		for(int i=0;i<t->getNumNodes();i++){
		 
			if (m->control_pressed) { return data; }
			
			double u;
			//int pcountSize = 0;
			//does this node have descendants from groupA
			it = t->tree[i].pcount.find(groupA);
			//if it does u = # of its descendants with a certain group / total number in tree with a certain group
			if (it != t->tree[i].pcount.end()) {
				u = (double) t->tree[i].pcount[groupA] / (double) ct->getGroupCount(groupA);
			}else { u = 0.00; }
			
			
			//does this node have descendants from group l
			it = t->tree[i].pcount.find(groupB);
			//if it does subtract their percentage from u
			if (it != t->tree[i].pcount.end()) {
				u -= (double) t->tree[i].pcount[groupB] / (double) ct->getGroupCount(groupB);
			}
			
			if (includeRoot) {
				if (t->tree[i].getBranchLength() != -1) {
					u = abs(u * t->tree[i].getBranchLength());
					WScore[(groupA+groupB)] += u;
				}
			}else{
				//if this is not the root then add it
				if (rootForGrouping[groups].count(i) == 0) {
					if (t->tree[i].getBranchLength() != -1) {
						u = abs(u * t->tree[i].getBranchLength());
						WScore[(groupA+groupB)] += u;
					}
				}
			}
		}		
		/********************************************************/
	 
		//calculate weighted score for the group combination
		double UN;	
		UN = (WScore[(groupA+groupB)] / D);
		
		if (isnan(UN) || isinf(UN)) { UN = 0; } 
		data.push_back(UN);
				
		return data; 
	}
	catch(exception& e) {
		m->errorOut(e, "Weighted", "getValues");
		exit(1);
	}
}
Exemple #2
0
//**********************************************************************************************************************
int SplitGroupCommand::runCount(){
	try {
        
        CountTable ct;
        ct.readTable(countfile, true, false);
        if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; }
        
        if (m->control_pressed) { return 0; }
        
        vector<string> namesGroups = ct.getNamesOfGroups();
        SharedUtil util;  util.setGroups(Groups, namesGroups); 
        
        //fill filehandles with neccessary ofstreams
        map<string, string> ffiles; //group -> filename
        map<string, string> cfiles; //group -> filename
        for (int i=0; i<Groups.size(); i++) {
            ofstream ftemp, ctemp;
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
            variables["[group]"] = Groups[i];
            string newFasta = getOutputFileName("fasta",variables);
            outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
            ffiles[Groups[i]] = newFasta;
            m->openOutputFile(newFasta, ftemp); ftemp.close();
            
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
            string newCount = getOutputFileName("count",variables);
            outputNames.push_back(newCount); outputTypes["count"].push_back(newCount);
            cfiles[Groups[i]] = newCount;
            m->openOutputFile(newCount, ctemp);
            ctemp << "Representative_Sequence\ttotal\t" << Groups[i] << endl; ctemp.close();
        }
        
        ifstream in; 
        m->openInputFile(fastafile, in);
        
        while (!in.eof()) {
            Sequence seq(in); m->gobble(in);
            
            if (m->control_pressed) { break; }
            if (seq.getName() != "") {
                vector<string> thisSeqsGroups = ct.getGroups(seq.getName());
                for (int i = 0; i < thisSeqsGroups.size(); i++) {
                    if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print
                        ofstream outf, outc;
                        m->openOutputFileAppend(ffiles[thisSeqsGroups[i]], outf);
                        seq.printSequence(outf); outf.close();
                        int numSeqs = ct.getGroupCount(seq.getName(), thisSeqsGroups[i]);
                        m->openOutputFileAppend(cfiles[thisSeqsGroups[i]], outc);
                        outc << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl; outc.close();
                    }
                }
            }
        }
        in.close();
        
        return 0;

    }
	catch(exception& e) {
		m->errorOut(e, "SplitGroupCommand", "runCount");
		exit(1);
	}
}
//**********************************************************************************************************************
int RemoveRareCommand::processList(){
	try {
				
		//you must provide a label because the names in the listfile need to be consistent
		string thisLabel = "";
		if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
		else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
		else { thisLabel = *labels.begin(); }
		
		InputData input(listfile, "list");
		ListVector* list = input.getListVector();
		
		//get first one or the one we want
		if (thisLabel != "") { 	
			//use smart distancing
			set<string> userLabels; userLabels.insert(thisLabel);
			set<string> processedLabels;
			string lastLabel = list->getLabel();
			while((list != NULL) && (userLabels.size() != 0)) {
				if(userLabels.count(list->getLabel()) == 1){
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					break;
				}
				
				if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					delete list;
					list = input.getListVector(lastLabel);
					break;
				}
				lastLabel = list->getLabel();
				delete list;
				list = input.getListVector();
			}
			if (userLabels.size() != 0) { 
				m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + ".");  m->mothurOutEndLine();
				list = input.getListVector(lastLabel); 
			}
		}
        
        string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
        variables["[extension]"] = m->getExtension(listfile);
        variables["[tag]"] = list->getLabel();
		string outputFileName = getOutputFileName("list", variables);
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
        variables["[extension]"] = m->getExtension(groupfile);
		string outputGroupFileName = getOutputFileName("group", variables);
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
        variables["[extension]"] = m->getExtension(countfile);
        string outputCountFileName = getOutputFileName("count", variables);
        
		ofstream out, outGroup;
		m->openOutputFile(outputFileName, out);
		
		bool wroteSomething = false;

		
		//if groupfile is given then use it
		GroupMap* groupMap;
        CountTable ct;
		if (groupfile != "") { 
			groupMap = new GroupMap(groupfile); groupMap->readMap(); 
			SharedUtil util;
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			util.setGroups(Groups, namesGroups);
			m->openOutputFile(outputGroupFileName, outGroup);
		}else if (countfile != "") {
            ct.readTable(countfile, true, false);
            if (ct.hasGroupInfo()) {
                vector<string> namesGroups = ct.getNamesOfGroups();
                SharedUtil util;
                util.setGroups(Groups, namesGroups);
            }
        }
		
		
		if (list != NULL) {
            
            vector<string> binLabels = list->getLabels();
            vector<string> newLabels;
            
			//make a new list vector
			ListVector newList;
			newList.setLabel(list->getLabel());
			
			//for each bin
			for (int i = 0; i < list->getNumBins(); i++) {
				if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close();  m->mothurRemove(outputFileName);  return 0; }
				
				//parse out names that are in accnos file
				string binnames = list->get(i);
				vector<string> names;
				string saveBinNames = binnames;
				m->splitAtComma(binnames, names);
                int binsize = names.size();
				
				vector<string> newGroupFile;
				if (groupfile != "") {
					vector<string> newNames;
					saveBinNames = "";
					for(int k = 0; k < names.size(); k++) {
						string group = groupMap->getGroup(names[k]);
						
						if (m->inUsersGroups(group, Groups)) {
							newGroupFile.push_back(names[k] + "\t" + group); 
								
							newNames.push_back(names[k]);	
							saveBinNames += names[k] + ",";
						}
					}
					names = newNames; binsize = names.size();
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
				}else if (countfile != "") {
					saveBinNames = "";
                    binsize = 0;
					for(int k = 0; k < names.size(); k++) {
                        if (ct.hasGroupInfo()) {
                            vector<string> thisSeqsGroups = ct.getGroups(names[k]);
                            
                            int thisSeqsCount = 0;
                            for (int n = 0; n < thisSeqsGroups.size(); n++) {
                                if (m->inUsersGroups(thisSeqsGroups[n], Groups)) {
                                    thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]);
                                }
                            }
                            binsize += thisSeqsCount;
                            //if you don't have any seqs from the groups the user wants, then remove you.
                            if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); }
                            else { saveBinNames += names[k] + ","; }
                        }else {
                            binsize += ct.getNumSeqs(names[k]); 
                            saveBinNames += names[k] + ",";
                        }
					}
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
                }

				if (binsize > nseqs) { //keep bin
					newList.push_back(saveBinNames);
                    newLabels.push_back(binLabels[i]);
					if (groupfile != "") {  for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }  }
                    else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) {  ct.remove(newGroupFile[k]); } }  
				}else {  if (countfile != "") {  for(int k = 0; k < names.size(); k++) {  ct.remove(names[k]); } }  }
			}
			
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.setLabels(newLabels);
                newList.printHeaders(out);
                newList.print(out);
			}
		}	
		
		out.close();
		if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
        if (countfile != "") { 
            if (ct.hasGroupInfo()) {
                vector<string> allGroups = ct.getNamesOfGroups();
                for (int i = 0; i < allGroups.size(); i++) {
                    if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); }
                }

            }
            ct.printTable(outputCountFileName);
            outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); 
        }
		
		if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "RemoveRareCommand", "processList");
		exit(1);
	}
}