Пример #1
0
int SensSpecCommand::processColumn(){
	try{		
		string origCutoff = "";
		bool getCutoff = 0;
		if(cutoff == -1.00)	{	getCutoff = 1;															}
		else				{	origCutoff = toString(cutoff);	cutoff += (0.49 / double(precision));	}
		
		set<string> seqPairSet;
		int numSeqs = 0;
		
		InputData input(listFile, "list");
		ListVector* list = input.getListVector();
		string lastLabel = list->getLabel();
		
		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
		set<string> processedLabels;
		set<string> userLabels = labels;
		
		
		while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
			
			if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);  }  delete list;  return 0;  }
			
			if(allLines == 1 || labels.count(list->getLabel()) == 1){			
				
				processedLabels.insert(list->getLabel());
				userLabels.erase(list->getLabel());
				
				//process
				numSeqs = fillSeqPairSet(seqPairSet, list);
				process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
			}
			
			if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
				string saveLabel = list->getLabel();
				
				delete list;
				list = input.getListVector(lastLabel);
				
				processedLabels.insert(list->getLabel());
				userLabels.erase(list->getLabel());
				
				//process
				numSeqs = fillSeqPairSet(seqPairSet, list);
				process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
				
				//restore real lastlabel to save below
				list->setLabel(saveLabel);
			}		
			
			lastLabel = list->getLabel();			
			
			delete list;
			list = input.getListVector();
		}
		
		
		//output error messages about any remaining user labels
		set<string>::iterator it;
		bool needToRun = false;
		for (it = userLabels.begin(); it != userLabels.end(); it++) {  
			m->mothurOut("Your file does not include the label " + *it); 
			if (processedLabels.count(lastLabel) != 1) {
				m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
				needToRun = true;
			}else {
				m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
			}
		}
		
		//run last label if you need to
		if (needToRun == true)  {
			if (list != NULL) {	delete list;	}
			list = input.getListVector(lastLabel);
			
			//process
			numSeqs = fillSeqPairSet(seqPairSet, list);
			delete list;
			process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs);
		}
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "SensSpecCommand", "processColumn");
		exit(1);
	}
}
Пример #2
0
string SensSpecCommand::preProcessList(){
    try {
        set<string> uniqueNames;
        //get unique names from distance file
        if (format == "phylip") {
            
            ifstream phylipFile;
            m->openInputFile(distFile, phylipFile);
            string numTest;
            int pNumSeqs;
			phylipFile >> numTest; m->gobble(phylipFile);
			
			if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
            else {
                m->mothurConvert(numTest, pNumSeqs);
            }
            
            string seqName;
            for(int i=0;i<pNumSeqs;i++){
                if (m->control_pressed) { return ""; }
                phylipFile >> seqName;  m->getline(phylipFile);  m->gobble(phylipFile);
                uniqueNames.insert(seqName);
            }
            phylipFile.close();
        }else {
            ifstream columnFile;
            m->openInputFile(distFile, columnFile);
            string seqNameA, seqNameB;
            double distance;
            
            while(columnFile){
                if (m->control_pressed) { return ""; }
                columnFile >> seqNameA >> seqNameB >> distance;
                uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB);
                m->gobble(columnFile);
            }
            columnFile.close();
        }
        
        //read list file, if numSeqs > unique names then remove redundant names
        string newListFile = listFile + ".temp";
        ofstream out;
        m->openOutputFile(newListFile, out);
        ifstream in;
		m->openInputFile(listFile, in);
		
		bool wroteSomething = false;
		
		while(!in.eof()){
			
			if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
            
			//read in list vector
			ListVector list(in);
            
            //listfile is already unique
            if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile);  return ""; }
			
			//make a new list vector
			ListVector newList;
			newList.setLabel(list.getLabel());
			
			//for each bin
			for (int i = 0; i < list.getNumBins(); i++) {
                
				//parse out names that are in accnos file
				string binnames = list.get(i);
                vector<string> bnames;
                m->splitAtComma(binnames, bnames);
				
				string newNames = "";
                for (int j = 0; j < bnames.size(); j++) {
					string name = bnames[j];
					//if that name is in the .accnos file, add it
					if (uniqueNames.count(name) != 0) {  newNames += name + ",";  }
				}
                
				//if there are names in this bin add to new list
				if (newNames != "") { 
					newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
					newList.push_back(newNames);	
				}
			}
            
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.print(out);
			}
			
			m->gobble(in);
		}
		in.close();	
		out.close();

        if (wroteSomething) { return newListFile; }
        else { m->mothurRemove(newListFile); }
        
        return ""; 
    }
Пример #3
0
//**********************************************************************************************************************
int GetRAbundCommand::processList(ofstream& out){
	try {
        CountTable ct;
        ct.readTable(countfile, false, false);
        
        InputData input(inputfile, format);
        ListVector* list = input.getListVector();
        string lastLabel = list->getLabel();
        
        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
        set<string> processedLabels;
        set<string> userLabels = labels;
        
        if (m->control_pressed) {  delete list;  return 0; }
        
        while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
            
            if(allLines == 1 || labels.count(list->getLabel()) == 1){
                m->mothurOut(list->getLabel()); m->mothurOutEndLine();
                
                if (m->control_pressed) {   delete list;  return 0;  }
                
                RAbundVector* rabund = new RAbundVector();
                createRabund(ct, list, rabund);
                
                if(sorted)	{   rabund->print(out);				}
                else		{	rabund->nonSortedPrint(out);	}
                
                delete rabund;
                processedLabels.insert(list->getLabel());
                userLabels.erase(list->getLabel());
            }
            
            if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                string saveLabel = list->getLabel();
                
                delete list;
                list = input.getListVector(lastLabel);
                
                m->mothurOut(list->getLabel()); m->mothurOutEndLine();
                
                if (m->control_pressed) {    delete list;  return 0;  }
                
                RAbundVector* rabund = new RAbundVector();
                createRabund(ct, list, rabund);
                
                if(sorted)	{   rabund->print(out);				}
                else		{	rabund->nonSortedPrint(out);	}
                
                delete rabund;
                processedLabels.insert(list->getLabel());
                userLabels.erase(list->getLabel());
                
                //restore real lastlabel to save below
                list->setLabel(saveLabel);
            }
            
            lastLabel = list->getLabel();
            
            delete list;
            list = input.getListVector();
        }
        
        //output error messages about any remaining user labels
        set<string>::iterator it;
        bool needToRun = false;
        for (it = userLabels.begin(); it != userLabels.end(); it++) {
            m->mothurOut("Your file does not include the label " + *it);
            if (processedLabels.count(lastLabel) != 1) {
                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
                needToRun = true;
            }else {
                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
            }
        }
        
        //run last label if you need to
        if (needToRun == true)  {
            if (list != NULL) {	delete list;	}
            list = input.getListVector(lastLabel);
            
            m->mothurOut(list->getLabel()); m->mothurOutEndLine();
            
            if (m->control_pressed) {   delete list;  return 0; }
            
            RAbundVector* rabund = new RAbundVector();
            createRabund(ct, list, rabund);
            
            if(sorted)	{   rabund->print(out);				}
            else		{	rabund->nonSortedPrint(out);	}
            
            delete rabund;
            delete list;
        }

        return 0;
    }
	catch(exception& e) {
		m->errorOut(e, "GetRAbundCommand", "processList");
		exit(1);
	}
}
Пример #4
0
//**********************************************************************************************************************
//returns a vector of listVectors where "little" vector is first
vector<ListVector> OtuHierarchyCommand::getListVectors() {
	try {
		
		int pos; //to use in smart distancing, position of last read in file
		int lastPos;
		vector<ListVector> lists;
		
		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
		set<string> processedLabels;
		set<string> userLabels = labels;

		//open file
		ifstream in;
		m->openInputFile(listFile, in);
		
		//get first list vector in file
		ListVector* list = NULL;
		string lastLabel = "";
		if (!in.eof())	{
			pos = in.tellg();
			lastPos = pos;
			list = new ListVector(in);  
			m->gobble(in);
			lastLabel = list->getLabel();
		}
		
		while ((list != NULL) && (userLabels.size() != 0)) {
		
			if (m->control_pressed) {  in.close(); delete list;  return lists; }
			
			//is this a listvector that we want?
			if(labels.count(list->getLabel()) == 1){
				
				//make copy of listvector
				ListVector temp(*list);
				lists.push_back(temp);
			
				processedLabels.insert(list->getLabel());
				userLabels.erase(list->getLabel());
			}
		
			//you have a label the user want that is smaller than this label and the last label has not already been processed 
			if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
				string saveLabel = list->getLabel();
				int savePos = in.tellg();
				
				//get smart distance line
				delete list;
				in.seekg(lastPos);
				if (!in.eof())	{	
					list = new ListVector(in);  
				}else { list = NULL; }
				
				//make copy of listvector
				ListVector temp(*list);
				lists.push_back(temp);
					
				processedLabels.insert(list->getLabel());
				userLabels.erase(list->getLabel());					
										
				//restore real lastlabel to save below
				list->setLabel(saveLabel);
				in.seekg(savePos);
			}
			
			lastLabel = list->getLabel();
			lastPos = pos;
			
			//get next line
			delete list;
			if (!in.eof())	{	
				pos = in.tellg();
				list = new ListVector(in);  
				m->gobble(in);
			}else { list = NULL; }
		}
		
		if (m->control_pressed) { in.close();  return lists; }				
		
		//output error messages about any remaining user labels
		set<string>::iterator it;
		bool needToRun = false;
		for (it = userLabels.begin(); it != userLabels.end(); it++) {  
			m->mothurOut("Your file does not include the label " + *it); 
			if (processedLabels.count(lastLabel) != 1) {
				m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
				needToRun = true;
			}else {
				m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
			}
		}
		
		if (m->control_pressed) {  in.close(); return lists; }
		
		//run last label if you need to
		if (needToRun == true)  {
			if (list != NULL) {	delete list;	}
			
			in.seekg(lastPos);
			if (!in.eof())	{	
				list = new ListVector(in); 
				
				//make copy of listvector
				ListVector temp(*list);
				lists.push_back(temp);
				
				delete list;
			}
		}
		
		in.close();
		return lists;
	}
	catch(exception& e) {
		m->errorOut(e, "OtuHierarchyCommand", "getListVectors");
		exit(1);
	}
}
Пример #5
0
//**********************************************************************************************************************
int RemoveRareCommand::processList(){
	try {
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
		string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
		string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" +  m->getExtension(groupfile);
		
		ofstream out, outGroup;
		m->openOutputFile(outputFileName, out);
		
		bool wroteSomething = false;
		
		//you must provide a label because the names in the listfile need to be consistent
		string thisLabel = "";
		if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
		else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
		else { thisLabel = *labels.begin(); }
		
		InputData input(listfile, "list");
		ListVector* list = input.getListVector();
		
		//get first one or the one we want
		if (thisLabel != "") { 	
			//use smart distancing
			set<string> userLabels; userLabels.insert(thisLabel);
			set<string> processedLabels;
			string lastLabel = list->getLabel();
			while((list != NULL) && (userLabels.size() != 0)) {
				if(userLabels.count(list->getLabel()) == 1){
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					break;
				}
				
				if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					delete list;
					list = input.getListVector(lastLabel);
					break;
				}
				lastLabel = list->getLabel();
				delete list;
				list = input.getListVector();
			}
			if (userLabels.size() != 0) { 
				m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + ".");  m->mothurOutEndLine();
				list = input.getListVector(lastLabel); 
			}
		}
		
		//if groupfile is given then use it
		GroupMap* groupMap;
		if (groupfile != "") { 
			groupMap = new GroupMap(groupfile); groupMap->readMap(); 
			SharedUtil util;
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			util.setGroups(Groups, namesGroups);
			m->openOutputFile(outputGroupFileName, outGroup);
		}
		
		
		if (list != NULL) {	
			//make a new list vector
			ListVector newList;
			newList.setLabel(list->getLabel());
			
			//for each bin
			for (int i = 0; i < list->getNumBins(); i++) {
				if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close();  m->mothurRemove(outputFileName);  return 0; }
				
				//parse out names that are in accnos file
				string binnames = list->get(i);
				vector<string> names;
				string saveBinNames = binnames;
				m->splitAtComma(binnames, names);
				
				vector<string> newGroupFile;
				if (groupfile != "") {
					vector<string> newNames;
					saveBinNames = "";
					for(int k = 0; k < names.size(); k++) {
						string group = groupMap->getGroup(names[k]);
						
						if (m->inUsersGroups(group, Groups)) {
							newGroupFile.push_back(names[k] + "\t" + group); 
								
							newNames.push_back(names[k]);	
							saveBinNames += names[k] + ",";
						}
					}
					names = newNames;
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
				}

				if (names.size() > nseqs) { //keep bin
					newList.push_back(saveBinNames);
					for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }
				}
			}
			
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.print(out);
			}
		}	
		
		out.close();
		if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
		
		if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "RemoveRareCommand", "processList");
		exit(1);
	}
}
Пример #6
0
//**********************************************************************************************************************
int GetSeqsCommand::readList(){
	try {
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
        map<string, string> variables; 
		variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
        variables["[extension]"] = m->getExtension(listfile);
		
		ifstream in;
		m->openInputFile(listfile, in);
		
		bool wroteSomething = false;
		int selectedCount = 0;
        
        if (m->debug) { set<string> temp; sanity["list"] = temp; }
		
		while(!in.eof()){
			
			selectedCount = 0;

			//read in list vector
			ListVector list(in);
			
			//make a new list vector
			ListVector newList;
			newList.setLabel(list.getLabel());
            
            variables["[distance]"] = list.getLabel();
            string outputFileName = getOutputFileName("list", variables);
			
			ofstream out;
			m->openOutputFile(outputFileName, out);
			outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
            
            vector<string> binLabels = list.getLabels();
            vector<string> newBinLabels;
            
            if (m->control_pressed) { in.close(); out.close();  return 0; }
			
			//for each bin
			for (int i = 0; i < list.getNumBins(); i++) {
			
				//parse out names that are in accnos file
				string binnames = list.get(i);
                vector<string> bnames;
                m->splitAtComma(binnames, bnames);
				
				string newNames = "";
                for (int j = 0; j < bnames.size(); j++) {
					string name = bnames[j];
					//if that name is in the .accnos file, add it
					if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; if (m->debug) { sanity["list"].insert(name); } }
				}
			
				//if there are names in this bin add to new list
				if (newNames != "") { 
					newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
					newList.push_back(newNames);
                    newBinLabels.push_back(binLabels[i]);
				}
			}
				
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.setLabels(newBinLabels);
                newList.printHeaders(out);
				newList.print(out);
			}
			
			m->gobble(in);
            out.close();
		}
		in.close();	
		
		
		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
		
		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
		
		return 0;

	}
	catch(exception& e) {
		m->errorOut(e, "GetSeqsCommand", "readList");
		exit(1);
	}
}
Пример #7
0
//**********************************************************************************************************************
int RemoveGroupsCommand::readList(){
	try {
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
		map<string, string> variables; 
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
        variables["[extension]"] = m->getExtension(listfile);
				
		ifstream in;
		m->openInputFile(listfile, in);
		
		bool wroteSomething = false;
		int removedCount = 0;
		
		while(!in.eof()){
			
			removedCount = 0;
			
			//read in list vector
			ListVector list(in);
            
            variables["[tag]"] = list.getLabel();
            string outputFileName = getOutputFileName("list", variables);
			
			ofstream out;
			m->openOutputFile(outputFileName, out);
			outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
            
            vector<string> binLabels = list.getLabels();
            vector<string> newBinLabels;
			
			//make a new list vector
			ListVector newList;
			newList.setLabel(list.getLabel());
			
			//for each bin
			for (int i = 0; i < list.getNumBins(); i++) {
				if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
				
				//parse out names that are in accnos file
				string binnames = list.get(i);
				
				string newNames = "";
				while (binnames.find_first_of(',') != -1) { 
					string name = binnames.substr(0,binnames.find_first_of(','));
					binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
					
					//if that name is in the .accnos file, add it
					if (names.count(name) == 0) {  newNames += name + ",";  }
					else {
						//if you are not in the accnos file check if you are a name that needs to be changed
						map<string, string>::iterator it = uniqueToRedundant.find(name);
						if (it != uniqueToRedundant.end()) {
							newNames += it->second + ",";
						}else { removedCount++; }
					}
				}
				
				//get last name
				if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
				else { //if you are not in the accnos file check if you are a name that needs to be changed
					map<string, string>::iterator it = uniqueToRedundant.find(binnames);
					if (it != uniqueToRedundant.end()) {
						newNames += it->second + ",";
					}else { removedCount++; }
				}
				
				//if there are names in this bin add to new list
				if (newNames != "") {  
					newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
					newList.push_back(newNames);
                    newBinLabels.push_back(binLabels[i]);
				}
			}
			
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.setLabels(newBinLabels);
                newList.printHeaders(out);
				newList.print(out);
			}
			
			m->gobble(in);
            out.close();
		}
		in.close();	
		
		
		if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
		m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
		
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "RemoveGroupsCommand", "readList");
		exit(1);
	}
}
Пример #8
0
//**********************************************************************************************************************
int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
	try {
		
		//make a new list vector
		ListVector newList;
		newList.setLabel(list->getLabel());
		
		int numOtus = 0;
		//for each bin
        vector<string> binLabels = list->getLabels();
        vector<string> newBinLabels;
		for (int i = 0; i < list->getNumBins(); i++) {
			if (m->control_pressed) { return 0; }
			
			//parse out names that are in accnos file
			string binnames = list->get(i);
			
			bool keepBin = false;
			string groupFileOutput = "";
			
			//parse names
			string individual = "";
			int length = binnames.length();
			for(int j=0;j<length;j++){
				if(binnames[j] == ','){
					string group = groupMap->getGroup(individual);
					if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
					
					if (m->inUsersGroups(group, Groups)) {  keepBin = true; }
					groupFileOutput += individual + "\t" + group + "\n";
					individual = "";	
					
				}
				else{  individual += binnames[j];  }
			}
			
			string group = groupMap->getGroup(individual);
			if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
			
			if (m->inUsersGroups(group, Groups)) {  keepBin = true; }
			groupFileOutput += individual + "\t" + group + "\n";
			
			//if there are sequences from the groups we want in this bin add to new list, output to groupfile
			if (keepBin) {  
				newList.push_back(binnames);
                newBinLabels.push_back(binLabels[i]);
				outGroup << groupFileOutput;
				numOtus++;
			}
		}
		
		//print new listvector
		if (newList.getNumBins() != 0) {
			wroteSomething = true;
			newList.setLabels(newBinLabels);
            newList.printHeaders(out);
            newList.print(out);
		}
		
		m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
	
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "GetOtusCommand", "processList");
		exit(1);
	}
}
Пример #9
0
//**********************************************************************************************************************
int GetOtusCommand::readListGroup(){
	try {
		InputData* input = new InputData(listfile, "list");
		ListVector* list = input->getListVector();
		string lastLabel = list->getLabel();
		
		//using first label seen if none is provided
		if (label == "") { label = lastLabel; }
        
        string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
        variables["[tag]"] = label;
        variables["[extension]"] = m->getExtension(listfile);
		string outputFileName = getOutputFileName("list", variables);
		
		ofstream out;
		m->openOutputFile(outputFileName, out);
        
        string GroupOutputDir = outputDir;
		if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
        variables["[filename]"] = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile));
        variables["[extension]"] = m->getExtension(groupfile);
		string outputGroupFileName = getOutputFileName("group", variables);
		
		ofstream outGroup;
		m->openOutputFile(outputGroupFileName, outGroup);

		
		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
		set<string> labels; labels.insert(label);
		set<string> processedLabels;
		set<string> userLabels = labels;
		
		bool wroteSomething = false;

		//as long as you are not at the end of the file or done wih the lines you want
		while((list != NULL) && (userLabels.size() != 0)) {
			
			if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); m->mothurRemove(outputFileName);  m->mothurRemove(outputGroupFileName);return 0;  }
			
			if(labels.count(list->getLabel()) == 1){
				processList(list, groupMap, out, outGroup, wroteSomething);
				
				processedLabels.insert(list->getLabel());
				userLabels.erase(list->getLabel());
			}
			
			if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
				string saveLabel = list->getLabel();
				
				delete list; 
				
				list = input->getListVector(lastLabel);
				
				processList(list, groupMap, out, outGroup, wroteSomething);
				
				processedLabels.insert(list->getLabel());
				userLabels.erase(list->getLabel());
				
				//restore real lastlabel to save below
				list->setLabel(saveLabel);
			}
			
			lastLabel = list->getLabel();
			
			delete list; list = NULL;
			
			//get next line to process
			list = input->getListVector();				
		}
		
		
		if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); m->mothurRemove(outputFileName);  m->mothurRemove(outputGroupFileName); return 0;  }
		
		//output error messages about any remaining user labels
		set<string>::iterator it;
		bool needToRun = false;
		for (it = userLabels.begin(); it != userLabels.end(); it++) {  
			m->mothurOut("Your file does not include the label " + *it); 
			if (processedLabels.count(lastLabel) != 1) {
				m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
				needToRun = true;
			}else {
				m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
			}
		}
		
		//run last label if you need to
		if (needToRun == true)  {
			if (list != NULL) { delete list; }
			
			list = input->getListVector(lastLabel);
			
			processList(list, groupMap, out, outGroup, wroteSomething);
			
			delete list; list = NULL;
		}
					
		out.close();
		outGroup.close();
		
		if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file does NOT contain any otus containing sequences from the groups you wish to get."); m->mothurOutEndLine();  }
		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
		outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
		
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "GetOtusCommand", "readList");
		exit(1);
	}
}
Пример #10
0
//**********************************************************************************************************************
int MGClusterCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//read names file
		if (namefile != "") {
			nameMap = new NameAssignment(namefile);
			nameMap->readMap();
		}else{ nameMap= new NameAssignment(); }
		
		string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
		string tag = "";
		time_t start;
		float previousDist = 0.00000;
		float rndPreviousDist = 0.00000;
		
		//read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
		//must remember to delete those objects here since readBlast does not
		read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
		read->read(nameMap);
		
		list = new ListVector(nameMap->getListVector());
		RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
		
		if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
		
		start = time(NULL);
		oldList = *list;
		map<string, int> Seq2Bin;
		map<string, int> oldSeq2Bin;
		
		if (method == "furthest")		{ tag = "fn";  }
		else if (method == "nearest")	{ tag = "nn";  }
		else							{ tag = "an";  }
		
		//open output files
		m->openOutputFile(fileroot+ tag + ".list",  listFile);
		m->openOutputFile(fileroot+ tag + ".rabund",  rabundFile);
		m->openOutputFile(fileroot+ tag + ".sabund",  sabundFile);
		
		if (m->control_pressed) { 
			delete nameMap; delete read; delete list; delete rabund; 
			listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
			outputTypes.clear();
			return 0; 
		}
		
		double saveCutoff = cutoff;
		
		if (!hclusterWanted) {
			//get distmatrix and overlap
			SparseMatrix* distMatrix = read->getDistMatrix();
			overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
			delete read;
		
			//create cluster
			if (method == "furthest")	{	cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); }
			else if(method == "nearest"){	cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
			else if(method == "average"){	cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method);	}
			cluster->setMapWanted(true);
			Seq2Bin = cluster->getSeqtoBin();
			oldSeq2Bin = Seq2Bin;
			
			if (m->control_pressed) { 
				delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
				listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
				outputTypes.clear();
				return 0; 
			}
		
			//cluster using cluster classes
			while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
				
				cluster->update(cutoff);
				
				if (m->control_pressed) { 
					delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
					listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
					outputTypes.clear();
					return 0; 
				}
				
				float dist = distMatrix->getSmallDist();
				float rndDist;
				if (hard) {
					rndDist = m->ceilDist(dist, precision); 
				}else{
					rndDist = m->roundDist(dist, precision); 
				}
				
				if(previousDist <= 0.0000 && dist != previousDist){
					oldList.setLabel("unique");
					printData(&oldList);
				}
				else if(rndDist != rndPreviousDist){
					if (merge) {
						ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
						
						if (m->control_pressed) { 
							delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
							listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
							outputTypes.clear();
							return 0; 
						}
						
						temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
						printData(temp);
						delete temp;
					}else{
						oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
						printData(&oldList);
					}
				}
	
				previousDist = dist;
				rndPreviousDist = rndDist;
				oldList = *list;
				Seq2Bin = cluster->getSeqtoBin();
				oldSeq2Bin = Seq2Bin;
			}
			
			if(previousDist <= 0.0000){
				oldList.setLabel("unique");
				printData(&oldList);
			}
			else if(rndPreviousDist<cutoff){
				if (merge) {
					ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
					
					if (m->control_pressed) { 
							delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
							listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
							outputTypes.clear();
							return 0; 
					}
					
					temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
					printData(temp);
					delete temp;
				}else{
					oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
					printData(&oldList);
				}
			}
			
			//free memory
			overlapMatrix.clear();
			delete distMatrix;
			delete cluster;
			
		}else { //use hcluster to cluster
			//get distmatrix and overlap
			overlapFile = read->getOverlapFile();
			distFile = read->getDistFile(); 
			delete read;
		
			//sort the distance and overlap files
			sortHclusterFiles(distFile, overlapFile);
			
			if (m->control_pressed) { 
				delete nameMap;  delete list; delete rabund; 
				listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
				outputTypes.clear();
				return 0; 
			}
		
			//create cluster
			hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
			hcluster->setMapWanted(true);
			Seq2Bin = cluster->getSeqtoBin();
			oldSeq2Bin = Seq2Bin;
			
			vector<seqDist> seqs; seqs.resize(1); // to start loop
			//ifstream inHcluster;
			//m->openInputFile(distFile, inHcluster);
			
			if (m->control_pressed) { 
				delete nameMap;  delete list; delete rabund; delete hcluster;
				listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
				outputTypes.clear();
				return 0; 
			}

			while (seqs.size() != 0){
		
				seqs = hcluster->getSeqs();
				
				//to account for cutoff change in average neighbor
				if (seqs.size() != 0) {
					if (seqs[0].dist > cutoff) { break; }
				}
				
				if (m->control_pressed) { 
					delete nameMap;  delete list; delete rabund; delete hcluster;
					listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
					m->mothurRemove(distFile);
					m->mothurRemove(overlapFile);
					outputTypes.clear();
					return 0; 
				}
				
				for (int i = 0; i < seqs.size(); i++) {  //-1 means skip me
					
					if (seqs[i].seq1 != seqs[i].seq2) {
		
						cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
						
						if (m->control_pressed) { 
							delete nameMap;  delete list; delete rabund; delete hcluster;
							listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
							m->mothurRemove(distFile);
							m->mothurRemove(overlapFile);
							outputTypes.clear();
							return 0; 
						}
	
						float rndDist;
						if (hard) {
							rndDist = m->ceilDist(seqs[i].dist, precision); 
						}else{
							rndDist = m->roundDist(seqs[i].dist, precision); 
						}
												
						if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
							oldList.setLabel("unique");
							printData(&oldList);
						}
						else if((rndDist != rndPreviousDist)){
							if (merge) {
								ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
								
								if (m->control_pressed) { 
									delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
									listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
									m->mothurRemove(distFile);
									m->mothurRemove(overlapFile);
									outputTypes.clear();
									return 0; 
								}

								temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
								printData(temp);
								delete temp;
							}else{
								oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
								printData(&oldList);
							}
						}
						
						previousDist = seqs[i].dist;
						rndPreviousDist = rndDist;
						oldList = *list;
						Seq2Bin = cluster->getSeqtoBin();
						oldSeq2Bin = Seq2Bin;
					}
				}
			}
			//inHcluster.close();
			
			if(previousDist <= 0.0000){
				oldList.setLabel("unique");
				printData(&oldList);
			}
			else if(rndPreviousDist<cutoff){
				if (merge) {
					ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
					
					if (m->control_pressed) { 
							delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
							listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
							m->mothurRemove(distFile);
							m->mothurRemove(overlapFile);
							outputTypes.clear();
							return 0; 
					}
					
					temp->setLabel(toString(rndPreviousDist,  precisionLength-1));
					printData(temp);
					delete temp;
				}else{
					oldList.setLabel(toString(rndPreviousDist,  precisionLength-1));
					printData(&oldList);
				}
			}
			
			delete hcluster;
			m->mothurRemove(distFile);
			m->mothurRemove(overlapFile);
		}
		
		delete list; 
		delete rabund;
		listFile.close();
		sabundFile.close();
		rabundFile.close();
	
		if (m->control_pressed) { 
			delete nameMap; 
			listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
			outputTypes.clear();
			return 0; 
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();	outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
		m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine();	outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
		m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine();	outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
		m->mothurOutEndLine();
		
		if (saveCutoff != cutoff) { 
			if (hard)	{  saveCutoff = m->ceilDist(saveCutoff, precision);	}
			else		{	saveCutoff = m->roundDist(saveCutoff, precision);  }
			
			m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
		}
		
		//set list file as new current listfile
		string current = "";
		itTypes = outputTypes.find("list");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
		}
		
		//set rabund file as new current rabundfile
		itTypes = outputTypes.find("rabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
		}
		
		//set sabund file as new current sabundfile
		itTypes = outputTypes.find("sabund");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
		}
		
		
		m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
			
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "MGClusterCommand", "execute");
		exit(1);
	}
}
Пример #11
0
//**********************************************************************************************************************
int GetSeqsCommand::readList(){
	try {
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
		string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
		ofstream out;
		m->openOutputFile(outputFileName, out);
		
		ifstream in;
		m->openInputFile(listfile, in);
		
		bool wroteSomething = false;
		int selectedCount = 0;
		
		while(!in.eof()){
			
			selectedCount = 0;
			
			if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName);  return 0; }

			//read in list vector
			ListVector list(in);
			
			//make a new list vector
			ListVector newList;
			newList.setLabel(list.getLabel());
			
			//for each bin
			for (int i = 0; i < list.getNumBins(); i++) {
			
				//parse out names that are in accnos file
				string binnames = list.get(i);
				
				string newNames = "";
				while (binnames.find_first_of(',') != -1) { 
					string name = binnames.substr(0,binnames.find_first_of(','));
					binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
					
					//if that name is in the .accnos file, add it
					if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++; }
				}
			
				//get last name
				if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++; }

				//if there are names in this bin add to new list
				if (newNames != "") { 
					newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
					newList.push_back(newNames);	
				}
			}
				
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.print(out);
			}
			
			m->gobble(in);
		}
		in.close();	
		out.close();
		
		if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
		outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName);
		
		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
		
		return 0;

	}
	catch(exception& e) {
		m->errorOut(e, "GetSeqsCommand", "readList");
		exit(1);
	}
}