SequenceParser::SequenceParser(string groupFile, string fastaFile, vector<string> groupsSelected) {
	try {
		
		m = MothurOut::getInstance();
		int error;
		
		//read group file
		groupMap = new GroupMap(groupFile);
		error = groupMap->readMap();
		
		if (error == 1) { m->control_pressed = true; }
		
		//initialize maps
        vector<string> namesOfGroups = groupMap->getNamesOfGroups();
        set<string> selectedGroups;
        if (groupsSelected.size() != 0) {
            SharedUtil util;  util.setGroups(groupsSelected, namesOfGroups);
            namesOfGroups = groupsSelected;
        }
        
		for (int i = 0; i < namesOfGroups.size(); i++) {
			vector<Sequence> temp;
			seqs[namesOfGroups[i]] = temp;
            selectedGroups.insert(namesOfGroups[i]);
		}
		
		//read fasta file making sure each sequence is in the group file
		ifstream in;
		m->openInputFile(fastaFile, in);
		
		while (!in.eof()) {
			
			if (m->control_pressed) { break; }
			
			Sequence seq(in); m->gobble(in);
			
			if (seq.getName() != "") {
                
				string group = groupMap->getGroup(seq.getName());
                if (selectedGroups.count(group) != 0) { //this is a group we want
                    if (group == "not found") {  error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your groupfile, please correct."); m->mothurOutEndLine();  }
                    else {
                        seqs[group].push_back(seq);
                    }
                }
			}
		}
		in.close();
		
		if (error == 1) { m->control_pressed = true; }
		
	}
	catch(exception& e) {
		m->errorOut(e, "SequenceParser", "SequenceParser");
		exit(1);
	}
}
int MergeGroupsCommand::processGroupFile(DesignMap*& designMap){
	try {
		
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
        map<string, string> variables; 
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
        variables["[extension]"] = m->getExtension(groupfile);
		string outputFileName = getOutputFileName("group", variables);
		outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		ofstream out;
		m->openOutputFile(outputFileName, out);
		
		//read groupfile
		GroupMap groupMap(groupfile);
		groupMap.readMap();
		
		//fill Groups - checks for "all" and for any typo groups
		SharedUtil* util = new SharedUtil();
		vector<string> nameGroups = groupMap.getNamesOfGroups();
		util->setGroups(Groups, nameGroups);
		delete util;
		
		vector<string> namesOfSeqs = groupMap.getNamesSeqs();
		bool error = false;
		
		for (int i = 0; i < namesOfSeqs.size(); i++) {
			
			if (m->control_pressed) { break; }
			
			string thisGroup = groupMap.getGroup(namesOfSeqs[i]);
			
			//are you in a group the user wants
			if (m->inUsersGroups(thisGroup, Groups)) {
				string thisGrouping = designMap->get(thisGroup);
				
				if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct."); m->mothurOutEndLine();  error = true; }
				else {
					out << namesOfSeqs[i] << '\t' << thisGrouping << endl;
				}
			}
		}
		
		if (error) { m->control_pressed = true; }

		out.close();
		
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "MergeGroupsCommand", "processGroupFile");
		exit(1);
	}
}
Exemple #3
0
int GetOtusCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		groupMap = new GroupMap(groupfile);
		groupMap->readMap();
		
		//get groups you want to get
		if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
		
		//make sure groups are valid
		//takes care of user setting groupNames that are invalid or setting groups=all
		SharedUtil* util = new SharedUtil();
		vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
		util->setGroups(Groups, gNamesOfGroups);
		groupMap->setNamesOfGroups(gNamesOfGroups);
		delete util;
		
		if (m->control_pressed) { delete groupMap; return 0; }
		
		//read through the list file keeping any otus that contain any sequence from the groups selected
		readListGroup();
		
		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } return 0; }
				
		if (outputNames.size() != 0) {
			m->mothurOutEndLine();
			m->mothurOut("Output File names: "); m->mothurOutEndLine();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
			m->mothurOutEndLine();
			
			//set list file as new current listfile
			string current = "";
			itTypes = outputTypes.find("group");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
			}
			
			itTypes = outputTypes.find("list");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
			}
		}
		
		return 0;		
	}
	
	catch(exception& e) {
		m->errorOut(e, "GetOtusCommand", "execute");
		exit(1);
	}
}
Exemple #4
0
//**********************************************************************************************************************
int SplitGroupCommand::runNameGroup(){
	try {
        SequenceParser* parser;
		if (namefile == "") {	parser = new SequenceParser(groupfile, fastafile);				}
		else				{	parser = new SequenceParser(groupfile, fastafile, namefile);	}
		
		if (m->control_pressed) { delete parser; return 0; }
        
		vector<string> namesGroups = parser->getNamesOfGroups();
		SharedUtil util;  util.setGroups(Groups, namesGroups);  
		
		string fastafileRoot = outputDir + m->getRootName(m->getSimpleName(fastafile));
		string namefileRoot = outputDir + m->getRootName(m->getSimpleName(namefile));
		
		m->mothurOutEndLine();
		for (int i = 0; i < Groups.size(); i++) {
			
			m->mothurOut("Processing group: " + Groups[i]); m->mothurOutEndLine();
			
            map<string, string> variables; 
            variables["[filename]"] = fastafileRoot;
            variables["[group]"] = Groups[i];

			string newFasta = getOutputFileName("fasta",variables);
            variables["[filename]"] = namefileRoot;
			string newName = getOutputFileName("name",variables);
			
			parser->getSeqs(Groups[i], newFasta, "/ab=", "/", false);
			outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
			if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} return 0; }
            
			if (namefile != "") { 
				parser->getNameMap(Groups[i], newName); 
				outputNames.push_back(newName); outputTypes["name"].push_back(newName);
			}
			
			if (m->control_pressed) { delete parser; for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} return 0; }
		}
		
		delete parser;
        
        return 0;

    }
	catch(exception& e) {
		m->errorOut(e, "SplitGroupCommand", "runNameGroup");
		exit(1);
	}
}
int MetaStatsCommand::execute(){
	try {
	
        if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
        
        //just used to convert files to test metastats online
        /****************************************************/
        bool convertInputToShared = false;
        convertSharedToInput = false;
        if (convertInputToShared) { convertToShared(sharedfile); return 0; }
        /****************************************************/
		
		designMap = new GroupMap(designfile);
		designMap->readDesignMap();
	
		input = new InputData(sharedfile, "sharedfile");
		lookup = input->getSharedRAbundVectors();
		string lastLabel = lookup[0]->getLabel();
		
		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
		set<string> processedLabels;
		set<string> userLabels = labels;
		
		//setup the pairwise comparions of sets for metastats
		//calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
		//make sure sets are all in designMap
		SharedUtil* util = new SharedUtil(); 
		vector<string> dGroups = designMap->getNamesOfGroups();
		util->setGroups(Sets, dGroups);  
		delete util;
		
		int numGroups = Sets.size();
		for (int a=0; a<numGroups; a++) { 
			for (int l = 0; l < a; l++) {	
				vector<string> groups; groups.push_back(Sets[a]); groups.push_back(Sets[l]);
				namesOfGroupCombos.push_back(groups);
			}
		}
	
		
		//only 1 combo
		if (numGroups == 2) { processors = 1; }
		else if (numGroups < 2)	{ m->mothurOut("Not enough sets, I need at least 2 valid sets. Unable to complete command."); m->mothurOutEndLine(); m->control_pressed = true; }

        if(processors != 1){
            int remainingPairs = namesOfGroupCombos.size();
            int startIndex = 0;
            for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
                int numPairs = remainingPairs; //case for last processor
                if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
                lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
                startIndex = startIndex + numPairs;
                remainingPairs = remainingPairs - numPairs;
            }
        }
		
		//as long as you are not at the end of the file or done wih the lines you want
		while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
			
			if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } return 0; }
	
			if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){			

				m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
				process(lookup);
				
				processedLabels.insert(lookup[0]->getLabel());
				userLabels.erase(lookup[0]->getLabel());
			}
			
			if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
				string saveLabel = lookup[0]->getLabel();
			
				for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
				lookup = input->getSharedRAbundVectors(lastLabel);
				m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
				
				process(lookup);
				
				processedLabels.insert(lookup[0]->getLabel());
				userLabels.erase(lookup[0]->getLabel());
				
				//restore real lastlabel to save below
				lookup[0]->setLabel(saveLabel);
			}
			
			lastLabel = lookup[0]->getLabel();
			//prevent memory leak
			for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
			
			if (m->control_pressed) {  outputTypes.clear(); m->clearGroups(); delete input;  delete designMap;  for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } return 0; }

			//get next line to process
			lookup = input->getSharedRAbundVectors();				
		}
		
		if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap;  for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); }  return 0; }

		//output error messages about any remaining user labels
		set<string>::iterator it;
		bool needToRun = false;
		for (it = userLabels.begin(); it != userLabels.end(); it++) {  
			m->mothurOut("Your file does not include the label " + *it); 
			if (processedLabels.count(lastLabel) != 1) {
				m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
				needToRun = true;
			}else {
				m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
			}
		}
	
		//run last label if you need to
		if (needToRun == true)  {
			for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
			lookup = input->getSharedRAbundVectors(lastLabel);
			
			m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
			
			process(lookup);
			
			for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
		}
	
		//reset groups parameter
		m->clearGroups();  
		delete input; 
		delete designMap;
		
		if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } return 0;}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "MetaStatsCommand", "execute");
		exit(1);
	}
}
int GetSharedOTUCommand::runShared() {
	try {
        InputData input(sharedfile, "sharedfile");
		vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
		string lastLabel = lookup[0]->getLabel();
        
        if (Groups.size() == 0) {
            Groups = m->getGroups();
            
            //make string for outputfile name
            userGroups = "unique.";
            for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
            userGroups = userGroups.substr(0, userGroups.length()-1);
        }else {
            //sanity check for group names
            SharedUtil util;
            vector<string> allGroups = m->getAllGroups();
            util.setGroups(Groups, allGroups);
        }
        
        //put groups in map to find easier
        for(int i = 0; i < Groups.size(); i++) {
            groupFinder[Groups[i]] = Groups[i];
        }

		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
		set<string> processedLabels;
		set<string> userLabels = labels;
        
		//as long as you are not at the end of the file or done wih the lines you want
		while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
			if (m->control_pressed) {
                outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); }
                for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
			}
            
            
			if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
				m->mothurOut(lookup[0]->getLabel()); 
				process(lookup);
				
				processedLabels.insert(lookup[0]->getLabel());
				userLabels.erase(lookup[0]->getLabel());
			}
			
			if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                string saveLabel = lookup[0]->getLabel();
                
                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                lookup = input.getSharedRAbundVectors(lastLabel);
                
                m->mothurOut(lookup[0]->getLabel()); 
                process(lookup);
                
                processedLabels.insert(lookup[0]->getLabel());
                userLabels.erase(lookup[0]->getLabel());
                
                //restore real lastlabel to save below
                lookup[0]->setLabel(saveLabel);
			}
			
			lastLabel = lookup[0]->getLabel();
            
			//get next line to process
			//prevent memory leak
			for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
			lookup = input.getSharedRAbundVectors();
		}
		
		if (m->control_pressed) {
            outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); }
            for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0;
        }
        
		//output error messages about any remaining user labels
		set<string>::iterator it;
		bool needToRun = false;
		for (it = userLabels.begin(); it != userLabels.end(); it++) {
			m->mothurOut("Your file does not include the label " + *it);
			if (processedLabels.count(lastLabel) != 1) {
				m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
				needToRun = true;
			}else {
				m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
			}
		}
		
		//run last label if you need to
		if (needToRun == true)  {
            for (int i = 0; i < lookup.size(); i++) {  if (lookup[i] != NULL) {	delete lookup[i];	} }
            lookup = input.getSharedRAbundVectors(lastLabel);
            
            m->mothurOut(lookup[0]->getLabel()); 
            process(lookup);
            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
		}
        
		//reset groups parameter
		m->clearGroups();  
		
		return 0;

    }
	catch(exception& e) {
		m->errorOut(e, "GetSharedOTUCommand", "runShared");
		exit(1);
	}
}
int GetSharedOTUCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
        if ( sharedfile != "") { runShared(); }
        else {
            m->setGroups(Groups);
            groupMap = new GroupMap(groupfile);
            int error = groupMap->readMap();
            if (error == 1) { delete groupMap; return 0; }
            
            if (m->control_pressed) { delete groupMap; return 0; }
            
            if (Groups.size() == 0) {
                Groups = groupMap->getNamesOfGroups();
                
                //make string for outputfile name
                userGroups = "unique.";
                for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
                userGroups = userGroups.substr(0, userGroups.length()-1);
            }else{
                //sanity check for group names
                SharedUtil util;
                vector<string> namesOfGroups = groupMap->getNamesOfGroups(); 
                util.setGroups(Groups, namesOfGroups);
                groupMap->setNamesOfGroups(namesOfGroups);
            }
        
            //put groups in map to find easier
            for(int i = 0; i < Groups.size(); i++) {
                groupFinder[Groups[i]] = Groups[i];
            }
            
            if (fastafile != "") {
                ifstream inFasta;
                m->openInputFile(fastafile, inFasta);
                
                while(!inFasta.eof()) {
                    if (m->control_pressed) { outputTypes.clear(); inFasta.close(); delete groupMap; return 0; }
                    
                    Sequence seq(inFasta); m->gobble(inFasta);
                    if (seq.getName() != "") {  seqs.push_back(seq);   }
                }
                inFasta.close();
            }
            
            ListVector* lastlist = NULL;
            string lastLabel = "";
            
            //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
            set<string> processedLabels;
            set<string> userLabels = labels;
            
            ifstream in;
            m->openInputFile(listfile, in);
            
            //as long as you are not at the end of the file or done wih the lines you want
            while((!in.eof()) && ((allLines == 1) || (userLabels.size() != 0))) {
                
                if (m->control_pressed) { 
                    if (lastlist != NULL) {		delete lastlist;	}
                    for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); }  outputTypes.clear();
                    delete groupMap; return 0;
                }
                
                list = new ListVector(in);
                
                if(allLines == 1 || labels.count(list->getLabel()) == 1){			
                    m->mothurOut(list->getLabel()); 
                    process(list);
                    
                    processedLabels.insert(list->getLabel());
                    userLabels.erase(list->getLabel());
                }
                
                if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                        string saveLabel = list->getLabel();
                        
                        m->mothurOut(lastlist->getLabel()); 
                        process(lastlist);
                        
                        processedLabels.insert(lastlist->getLabel());
                        userLabels.erase(lastlist->getLabel());
                        
                        //restore real lastlabel to save below
                        list->setLabel(saveLabel);
                }

                lastLabel = list->getLabel();
                
                if (lastlist != NULL) {		delete lastlist;	}
                lastlist = list;			
            }
            
            in.close();
            
            //output error messages about any remaining user labels
            set<string>::iterator it;
            bool needToRun = false;
            for (it = userLabels.begin(); it != userLabels.end(); it++) {  
                m->mothurOut("Your file does not include the label " + *it); 
                if (processedLabels.count(lastLabel) != 1) {
                    m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
                    needToRun = true;
                }else {
                    m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
                }
            }
            
            //run last label if you need to
            if (needToRun == true)  {
                    m->mothurOut(lastlist->getLabel()); 
                    process(lastlist);
                        
                    processedLabels.insert(lastlist->getLabel());
                    userLabels.erase(lastlist->getLabel());
            }
            

            //reset groups parameter
            m->clearGroups();  
            
            if (lastlist != NULL) {		delete lastlist;	}
            
            if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); }  delete groupMap; return 0; } 
		}
		//set fasta file as new current fastafile
		string current = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
		}
		
		if (output == "accnos") {
			itTypes = outputTypes.find("accnos");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
			}
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();


		return 0;
	}

	catch(exception& e) {
		m->errorOut(e, "GetSharedOTUCommand", "execute");
		exit(1);
	}
}
int CollectSharedCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//if the users entered no valid calculators don't execute command
		if (cDisplays.size() == 0) { return 0; }
		for(int i=0;i<cDisplays.size();i++){	cDisplays[i]->setAll(all);	}	
	
		input = new InputData(sharedfile, "sharedfile");
		order = input->getSharedOrderVector();
		string lastLabel = order->getLabel();
		
		//if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
		set<string> processedLabels;
		set<string> userLabels = labels;
			
		//set users groups
		SharedUtil* util = new SharedUtil();
		Groups = m->getGroups();
		vector<string> allGroups = m->getAllGroups();
		util->setGroups(Groups, allGroups, "collect");
		m->setGroups(Groups);
		m->setAllGroups(allGroups);
		delete util;

		while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
			if (m->control_pressed) { 
					for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	}  outputTypes.clear();
					for(int i=0;i<cDisplays.size();i++){	delete cDisplays[i];	}
					delete order; delete input;
					m->clearGroups();
					return 0;
			}

			if(allLines == 1 || labels.count(order->getLabel()) == 1){
			
				m->mothurOut(order->getLabel()); m->mothurOutEndLine();
				//create collectors curve
				cCurve = new Collect(order, cDisplays);
				cCurve->getSharedCurve(freq);
				delete cCurve;
			
				processedLabels.insert(order->getLabel());
				userLabels.erase(order->getLabel());
			}
			
			//you have a label the user want that is smaller than this label and the last label has not already been processed
			if ((m->anyLabelsToProcess(order->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
				string saveLabel = order->getLabel();
				
				delete order;
				order = input->getSharedOrderVector(lastLabel);
				
				m->mothurOut(order->getLabel()); m->mothurOutEndLine();
				//create collectors curve
				cCurve = new Collect(order, cDisplays);
				cCurve->getSharedCurve(freq);
				delete cCurve;
				
				processedLabels.insert(order->getLabel());
				userLabels.erase(order->getLabel());
				
				//restore real lastlabel to save below
				order->setLabel(saveLabel);
			}
			
			
			lastLabel = order->getLabel();			
			
			//get next line to process
			delete order;
			order = input->getSharedOrderVector();
		}
		
		if (m->control_pressed) { 
					for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	}   outputTypes.clear();
					for(int i=0;i<cDisplays.size();i++){	delete cDisplays[i];	}
					m->clearGroups();
					delete input;
					return 0;
		}
		
		//output error messages about any remaining user labels
		set<string>::iterator it;
		bool needToRun = false;
		for (it = userLabels.begin(); it != userLabels.end(); it++) {  
			m->mothurOut("Your file does not include the label " + *it); 
			if (processedLabels.count(lastLabel) != 1) {
				m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
				needToRun = true;
			}else {
				m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
			}
		}
		
		//run last label if you need to
		if (needToRun == true)  {
			if (order != NULL) {  delete order;  }
			order = input->getSharedOrderVector(lastLabel);
			
			m->mothurOut(order->getLabel()); m->mothurOutEndLine();
			cCurve = new Collect(order, cDisplays);
			cCurve->getSharedCurve(freq);
			delete cCurve;
			
			if (m->control_pressed) { 
				for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	}  outputTypes.clear();
				for(int i=0;i<cDisplays.size();i++){	delete cDisplays[i];	}
				delete order; 
				delete input;
				m->clearGroups();
				return 0;
			}

			delete order;
		}
		
		for(int i=0;i<cDisplays.size();i++){	delete cDisplays[i];	}	
		
		//reset groups parameter
		m->clearGroups(); 
		delete input;
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();

		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "CollectSharedCommand", "execute");
		exit(1);
	}
}
Exemple #9
0
//**********************************************************************************************************************
int SplitGroupCommand::runCount(){
	try {
        
        CountTable ct;
        ct.readTable(countfile, true, false);
        if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; }
        
        if (m->control_pressed) { return 0; }
        
        vector<string> namesGroups = ct.getNamesOfGroups();
        SharedUtil util;  util.setGroups(Groups, namesGroups); 
        
        //fill filehandles with neccessary ofstreams
        map<string, string> ffiles; //group -> filename
        map<string, string> cfiles; //group -> filename
        for (int i=0; i<Groups.size(); i++) {
            ofstream ftemp, ctemp;
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
            variables["[group]"] = Groups[i];
            string newFasta = getOutputFileName("fasta",variables);
            outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta);
            ffiles[Groups[i]] = newFasta;
            m->openOutputFile(newFasta, ftemp); ftemp.close();
            
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
            string newCount = getOutputFileName("count",variables);
            outputNames.push_back(newCount); outputTypes["count"].push_back(newCount);
            cfiles[Groups[i]] = newCount;
            m->openOutputFile(newCount, ctemp);
            ctemp << "Representative_Sequence\ttotal\t" << Groups[i] << endl; ctemp.close();
        }
        
        ifstream in; 
        m->openInputFile(fastafile, in);
        
        while (!in.eof()) {
            Sequence seq(in); m->gobble(in);
            
            if (m->control_pressed) { break; }
            if (seq.getName() != "") {
                vector<string> thisSeqsGroups = ct.getGroups(seq.getName());
                for (int i = 0; i < thisSeqsGroups.size(); i++) {
                    if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print
                        ofstream outf, outc;
                        m->openOutputFileAppend(ffiles[thisSeqsGroups[i]], outf);
                        seq.printSequence(outf); outf.close();
                        int numSeqs = ct.getGroupCount(seq.getName(), thisSeqsGroups[i]);
                        m->openOutputFileAppend(cfiles[thisSeqsGroups[i]], outc);
                        outc << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl; outc.close();
                    }
                }
            }
        }
        in.close();
        
        return 0;

    }
	catch(exception& e) {
		m->errorOut(e, "SplitGroupCommand", "runCount");
		exit(1);
	}
}
SequenceParser::SequenceParser(string groupFile, string fastaFile, string nameFile, vector<string> groupsSelected) {
	try {
		
		m = MothurOut::getInstance();
		int error;
		
		//read group file
		groupMap = new GroupMap(groupFile);
		error = groupMap->readMap();
		
		if (error == 1) { m->control_pressed = true; }
		
		//initialize maps
        vector<string> namesOfGroups = groupMap->getNamesOfGroups();
        set<string> selectedGroups;
        if (groupsSelected.size() != 0) {
            SharedUtil util;  util.setGroups(groupsSelected, namesOfGroups);
            namesOfGroups = groupsSelected;
        }
        
        for (int i = 0; i < namesOfGroups.size(); i++) {
            vector<Sequence> temp;
            map<string, string> tempMap;
            seqs[namesOfGroups[i]] = temp;
            nameMapPerGroup[namesOfGroups[i]] = tempMap;
            selectedGroups.insert(namesOfGroups[i]);
        }
        
        map<string, string>::iterator it;
        map<string, string> nameMap;
        m->readNames(nameFile, nameMap);
        
		//read fasta file making sure each sequence is in the group file
		ifstream in;
		m->openInputFile(fastaFile, in);
        
        while (!in.eof()) {
            
            if (m->control_pressed) { break; }
            
            Sequence seq(in); m->gobble(in);
            
            if (seq.getName() != "") {
                
                it = nameMap.find(seq.getName());
                
                if (it != nameMap.end()) { //in namefile
                    
                    vector<string> names;
                    string secondCol = it->second;
                    m->splitAtChar(secondCol, names, ',');
                    
                    map<string, string> splitMap; //group -> name1,name2,...
                    map<string, string>::iterator itSplit;
                    for (int i = 0; i < names.size(); i++) {
                        string group = groupMap->getGroup(names[i]);
                        
                        if (selectedGroups.count(group) != 0) { //this is a group we want
                            if (group == "not found") {  error = 1; m->mothurOut("[ERROR]: " + names[i] + " is in your names file and not in your group file, please correct.\n");  }
                            else {
                                allSeqsMap[names[i]] = names[0];
                                
                                itSplit = splitMap.find(group);
                                if (itSplit != splitMap.end()) { //adding seqs to this group
                                    (itSplit->second) += "," + names[i];
                                }else { //first sighting of this group
                                    splitMap[group] = names[i];
                                }
                            }
                        }
                    }
                    
                    //fill nameMapPerGroup - holds all lines in namefile separated by group
                    for (itSplit = splitMap.begin(); itSplit != splitMap.end(); itSplit++) {
                        //grab first name
                        string firstName = "";
                        for(int i = 0; i < (itSplit->second).length(); i++) {
                            if (((itSplit->second)[i]) != ',') {
                                firstName += ((itSplit->second)[i]);
                            }else { break; }
                        }
                        
                        //group1 -> seq1 -> seq1,seq2,seq3
                        nameMapPerGroup[itSplit->first][firstName] = itSplit->second;
                        seqs[itSplit->first].push_back(Sequence(firstName, seq.getAligned()));
                    }

                }else { error = 1; m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file and not in your name file, please correct.\n"); }
            }
        }
		in.close();
				 
		if (error == 1) { m->control_pressed = true; }
		
	}
	catch(exception& e) {
		m->errorOut(e, "SequenceParser", "SequenceParser");
		exit(1);
	}
}
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){
    try {
        CountTable countTable;
        if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->control_pressed = true; return 0; }
        
        //read countTable
        countTable.readTable(countfile, true, false);
        
        //fill Groups - checks for "all" and for any typo groups
        SharedUtil util;
        vector<string> nameGroups = countTable.getNamesOfGroups();
        util.setGroups(Groups, nameGroups);
        
        vector<string> dnamesGroups = designMap->getNamesGroups();
        
        //sanity check
        bool error = false;
        if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number
            //is every group in counttable also in designmap
            for (int i = 0; i < nameGroups.size(); i++) {
                if (m->control_pressed) { break; }
                if (!m->inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; }
            }
            
        }
        if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->control_pressed = true; return 0; }
        
        //user selected groups - remove some groups from table
        if (Groups.size() != nameGroups.size()) {
            for (int i = 0; i < nameGroups.size(); i++) {
                if (!m->inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); }
            }
        }
        //ask again in case order changed
        nameGroups = countTable.getNamesOfGroups();
        int numGroups = nameGroups.size();
        
        //create new table
        CountTable newTable;
        vector<string> treatments = designMap->getCategory();
        map<string, vector<int> > clearedMap;
        for (int i = 0; i < treatments.size(); i++) {
            newTable.addGroup(treatments[i]);
            vector<int> temp;
            clearedMap[treatments[i]] = temp;
        }
        treatments = newTable.getNamesOfGroups();
        
        set<string> namesToRemove;
        vector<string> namesOfSeqs = countTable.getNamesOfSeqs();
        for (int i = 0; i < namesOfSeqs.size(); i++) {
            
            if (m->control_pressed) { break; }
            
            vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]);
            map<string, vector<int> > thisSeqsMap = clearedMap;
            
            for (int j = 0; j < numGroups; j++) {
                thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]);
            }
        
            //create new counts for seq for new table
            vector<int> newCounts; int totalAbund = 0;
            for (int j = 0; j < treatments.size(); j++){
                int abund = mergeAbund(thisSeqsMap[treatments[j]]);
                newCounts.push_back(abund);  //order matters, add in count for each treatment in new table.
                totalAbund += abund;
            }
            
            //add seq to new table
            if(totalAbund == 0) {
                namesToRemove.insert(namesOfSeqs[i]);
            }else { newTable.push_back(namesOfSeqs[i], newCounts); }
        }
        
        if (error) { m->control_pressed = true; return 0; }
        
        //remove sequences zeroed out by median method
        if (namesToRemove.size() != 0) {
            //print names
            ofstream out;
            string accnosFile = "accnosFile.temp";
            m->openOutputFile(accnosFile, out);
            
            //output to .accnos file
            for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) {
                if (m->control_pressed) {  out.close(); m->mothurRemove(accnosFile); return 0; }
                out << *it << endl;
            }
            out.close();

            //run remove.seqs
            string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile;
            
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine();
            m->mothurCalling = true;
            
            Command* removeCommand = new RemoveSeqsCommand(inputString);
            removeCommand->execute();
            
            map<string, vector<string> > filenames = removeCommand->getOutputFiles();
            
            delete removeCommand;
            m->mothurCalling = false;
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            
            m->mothurRemove(accnosFile);
        }
    
        string thisOutputDir = outputDir;
        if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
        variables["[extension]"] = m->getExtension(countfile);
        string outputFileName = getOutputFileName("count", variables);
        outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
        
        newTable.printTable(outputFileName);
        
        return 0;
        
    }
    catch(exception& e) {
        m->errorOut(e, "MergeGroupsCommand", "processCountFile");
        exit(1);
    }
}
Exemple #12
0
int HomovaCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//read design file
		designMap = new GroupMap(designFileName);
		designMap->readDesignMap();
		
		if (outputDir == "") { outputDir = m->hasPath(phylipFileName); }
		
		//read in distance matrix and square it
		ReadPhylipVector readMatrix(phylipFileName);
		vector<string> sampleNames = readMatrix.read(distanceMatrix);
		
        if (Sets.size() != 0) { //user selected sets, so we want to remove the samples not in those sets
            SharedUtil util; 
            vector<string> dGroups = designMap->getNamesOfGroups();
            util.setGroups(Sets, dGroups);  
            
            for(int i=0;i<distanceMatrix.size();i++){
                
                if (m->control_pressed) { delete designMap; return 0; }
                
                string group = designMap->getGroup(sampleNames[i]);
                
                if (group == "not found") {
                    m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
                }else if (!m->inUsersGroups(group, Sets)){  //not in set we want remove it
                    //remove from all other rows
                    for(int j=0;j<distanceMatrix.size();j++){
                        distanceMatrix[j].erase(distanceMatrix[j].begin()+i);
                    }
                    distanceMatrix.erase(distanceMatrix.begin()+i);
                    sampleNames.erase(sampleNames.begin()+i);
                    i--;
                }
            }
        }

		for(int i=0;i<distanceMatrix.size();i++){
			for(int j=0;j<i;j++){
				distanceMatrix[i][j] *= distanceMatrix[i][j];	
			}
		}
		
		//link designMap to rows/columns in distance matrix
		map<string, vector<int> > origGroupSampleMap;
		for(int i=0;i<sampleNames.size();i++){
			string group = designMap->getGroup(sampleNames[i]);
			
			if (group == "not found") {
				m->mothurOut("[ERROR]: " + sampleNames[i] + " is not in your design file, please correct."); m->mothurOutEndLine(); m->control_pressed = true;
			}else { origGroupSampleMap[group].push_back(i); }
		}
		int numGroups = origGroupSampleMap.size();
		
		if (m->control_pressed) { delete designMap; return 0; }
		
		//create a new filename
		ofstream HOMOVAFile;
        map<string, string> variables; 
		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(phylipFileName));
		string HOMOVAFileName = getOutputFileName("homova", variables);				
		m->openOutputFile(HOMOVAFileName, HOMOVAFile);
		outputNames.push_back(HOMOVAFileName); outputTypes["homova"].push_back(HOMOVAFileName);
		
		HOMOVAFile << "HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values" << endl;
		m->mothurOut("HOMOVA\tBValue\tP-value\tSSwithin/(Ni-1)_values\n");
		
		double fullHOMOVAPValue = runHOMOVA(HOMOVAFile, origGroupSampleMap, experimentwiseAlpha);

		if(fullHOMOVAPValue <= experimentwiseAlpha && numGroups > 2){
			
			int numCombos = numGroups * (numGroups-1) / 2;
			double pairwiseAlpha = experimentwiseAlpha / (double) numCombos;
			
			map<string, vector<int> >::iterator itA;
			map<string, vector<int> >::iterator itB;
			
			for(itA=origGroupSampleMap.begin();itA!=origGroupSampleMap.end();itA++){
				itB = itA;itB++;
				for(;itB!=origGroupSampleMap.end();itB++){
					map<string, vector<int> > pairwiseGroupSampleMap;
					pairwiseGroupSampleMap[itA->first] = itA->second;
					pairwiseGroupSampleMap[itB->first] = itB->second;
					
					runHOMOVA(HOMOVAFile, pairwiseGroupSampleMap, pairwiseAlpha);
				}			
			}
			HOMOVAFile << endl;
			m->mothurOutEndLine();
			
			m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
			m->mothurOut("Pair-wise error rate (Bonferroni): " + toString(pairwiseAlpha) + '\n');
		}
		else{
			m->mothurOut("Experiment-wise error rate: " + toString(experimentwiseAlpha) + '\n');
		}
		
		m->mothurOut("If you have borderline P-values, you should try increasing the number of iterations\n");
		
		delete designMap;
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "HomovaCommand", "execute");
		exit(1);
	}
}
Exemple #13
0
//**********************************************************************************************************************
int RemoveRareCommand::processList(){
	try {
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
		string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
		string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" +  m->getExtension(groupfile);
		
		ofstream out, outGroup;
		m->openOutputFile(outputFileName, out);
		
		bool wroteSomething = false;
		
		//you must provide a label because the names in the listfile need to be consistent
		string thisLabel = "";
		if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
		else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
		else { thisLabel = *labels.begin(); }
		
		InputData input(listfile, "list");
		ListVector* list = input.getListVector();
		
		//get first one or the one we want
		if (thisLabel != "") { 	
			//use smart distancing
			set<string> userLabels; userLabels.insert(thisLabel);
			set<string> processedLabels;
			string lastLabel = list->getLabel();
			while((list != NULL) && (userLabels.size() != 0)) {
				if(userLabels.count(list->getLabel()) == 1){
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					break;
				}
				
				if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					delete list;
					list = input.getListVector(lastLabel);
					break;
				}
				lastLabel = list->getLabel();
				delete list;
				list = input.getListVector();
			}
			if (userLabels.size() != 0) { 
				m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + ".");  m->mothurOutEndLine();
				list = input.getListVector(lastLabel); 
			}
		}
		
		//if groupfile is given then use it
		GroupMap* groupMap;
		if (groupfile != "") { 
			groupMap = new GroupMap(groupfile); groupMap->readMap(); 
			SharedUtil util;
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			util.setGroups(Groups, namesGroups);
			m->openOutputFile(outputGroupFileName, outGroup);
		}
		
		
		if (list != NULL) {	
			//make a new list vector
			ListVector newList;
			newList.setLabel(list->getLabel());
			
			//for each bin
			for (int i = 0; i < list->getNumBins(); i++) {
				if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close();  m->mothurRemove(outputFileName);  return 0; }
				
				//parse out names that are in accnos file
				string binnames = list->get(i);
				vector<string> names;
				string saveBinNames = binnames;
				m->splitAtComma(binnames, names);
				
				vector<string> newGroupFile;
				if (groupfile != "") {
					vector<string> newNames;
					saveBinNames = "";
					for(int k = 0; k < names.size(); k++) {
						string group = groupMap->getGroup(names[k]);
						
						if (m->inUsersGroups(group, Groups)) {
							newGroupFile.push_back(names[k] + "\t" + group); 
								
							newNames.push_back(names[k]);	
							saveBinNames += names[k] + ",";
						}
					}
					names = newNames;
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
				}

				if (names.size() > nseqs) { //keep bin
					newList.push_back(saveBinNames);
					for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }
				}
			}
			
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.print(out);
			}
		}	
		
		out.close();
		if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
		
		if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "RemoveRareCommand", "processList");
		exit(1);
	}
}
int RemoveGroupsCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//get groups you want to remove
		if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups);  }
		
		if (groupfile != "") {
			groupMap = new GroupMap(groupfile);
			groupMap->readMap();
			
			//make sure groups are valid
			//takes care of user setting groupNames that are invalid or setting groups=all
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			vector<string> checkedGroups;
            for (int i = 0; i < Groups.size(); i++) {
                if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
                else {  m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
            }
            
            if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
			else {
                Groups = checkedGroups;
                m->setGroups(Groups);
            }
            
			//fill names with names of sequences that are from the groups we want to remove 
			fillNames();
			
			delete groupMap;
		}else if (countfile != ""){
            if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
                m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
            }
            CountTable ct;
            ct.readTable(countfile, true, false);
            if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
            
            vector<string> gNamesOfGroups = ct.getNamesOfGroups();
            SharedUtil util;
            util.setGroups(Groups, gNamesOfGroups);
            vector<string> namesOfSeqs = ct.getNamesOfSeqs();
            sort(Groups.begin(), Groups.end());
            
            for (int i = 0; i < namesOfSeqs.size(); i++) {
                vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
                if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
                    names.insert(namesOfSeqs[i]);
                }
            }
        }

				
		if (m->control_pressed) { return 0; }
		
		//read through the correct file and output lines you want to keep
		if (namefile != "")			{		readName();		}
		if (fastafile != "")		{		readFasta();	}
		if (groupfile != "")		{		readGroup();	}
        if (countfile != "")		{		readCount();	}
		if (listfile != "")			{		readList();		}
		if (taxfile != "")			{		readTax();		}
		if (sharedfile != "")		{		readShared();	}
        if (designfile != "")		{		readDesign();	}
		
		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } return 0; }
				
		if (outputNames.size() != 0) {
			m->mothurOutEndLine();
			m->mothurOut("Output File names: "); m->mothurOutEndLine();
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
			m->mothurOutEndLine();
			
			//set fasta file as new current fastafile
			string current = "";
			itTypes = outputTypes.find("fasta");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
			}
			
			itTypes = outputTypes.find("name");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
			}
			
			itTypes = outputTypes.find("group");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
			}
			
			itTypes = outputTypes.find("list");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
			}
			
			itTypes = outputTypes.find("taxonomy");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
			}
			
			itTypes = outputTypes.find("shared");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
			}
            
            itTypes = outputTypes.find("design");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
			}
            
            itTypes = outputTypes.find("count");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
			}
		}
		
		return 0;		
	}
	
	catch(exception& e) {
		m->errorOut(e, "RemoveGroupsCommand", "execute");
		exit(1);
	}
}
Exemple #15
0
int CorrAxesCommand::getMetadata(){
	try {
		vector<string> groupNames;
		
		ifstream in;
		m->openInputFile(metadatafile, in);
		
		string headerLine = m->getline(in); m->gobble(in);
		istringstream iss (headerLine,istringstream::in);
		
		//read the first label, because it refers to the groups
		string columnLabel;
		iss >> columnLabel; m->gobble(iss); 
		
		//save names of columns you are reading
		while (!iss.eof()) {
			iss >> columnLabel; m->gobble(iss);
			metadataLabels.push_back(columnLabel);
		}
		int count = metadataLabels.size();
			
		//read rest of file
		while (!in.eof()) {
			
			if (m->control_pressed) { in.close(); return 0; }
			
			string group = "";
			in >> group; m->gobble(in);
			groupNames.push_back(group);
				
			SharedRAbundFloatVector* tempLookup = new SharedRAbundFloatVector();
			tempLookup->setGroup(group);
			tempLookup->setLabel("1");
			
			for (int i = 0; i < count; i++) {
				float temp = 0.0;
				in >> temp; 
				tempLookup->push_back(temp, group);
			}
			
			lookupFloat.push_back(tempLookup);
			
			m->gobble(in);
		}
		in.close();
		
		//remove any groups the user does not want, and set globaldata->groups with only valid groups
		SharedUtil* util;
		util = new SharedUtil();
		Groups = m->getGroups();
		util->setGroups(Groups, groupNames);
		m->setGroups(Groups);
		
		for (int i = 0; i < lookupFloat.size(); i++) {
			//if this sharedrabund is not from a group the user wants then delete it.
			if (util->isValidGroup(lookupFloat[i]->getGroup(), m->getGroups()) == false) { 
				delete lookupFloat[i]; lookupFloat[i] = NULL;
				lookupFloat.erase(lookupFloat.begin()+i); 
				i--; 
			}
		}
		
		delete util;
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "CorrAxesCommand", "getMetadata");	
		exit(1);
	}
}
int ParsimonyCommand::execute() {
	try {
	
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		
		//randomtree will tell us if user had their own treefile or if they just want the random distribution
		//user has entered their own tree
		if (randomtree == "") { 
			
			m->setTreeFile(treefile);
			
            TreeReader* reader;
            if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
            else { reader = new TreeReader(treefile, countfile); }
            T = reader->getTrees();
            ct = T[0]->getCountTable();
            delete reader;
	
			if(outputDir == "") { outputDir += m->hasPath(treefile); }
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getSimpleName(treefile) +  ".";
            
			output = new ColumnFile(getOutputFileName("parsimony",variables), itersString);
			outputNames.push_back(getOutputFileName("parsimony",variables));
			outputTypes["parsimony"].push_back(getOutputFileName("parsimony",variables));
				
			sumFile = getOutputFileName("psummary",variables);
			m->openOutputFile(sumFile, outSum);
			outputNames.push_back(sumFile);
			outputTypes["psummary"].push_back(sumFile);
		}else { //user wants random distribution
			getUserInput();
				
			if(outputDir == "") { outputDir += m->hasPath(randomtree); }
			output = new ColumnFile(outputDir+ m->getSimpleName(randomtree), itersString);
			outputNames.push_back(outputDir+ m->getSimpleName(randomtree));
			outputTypes["parsimony"].push_back(outputDir+ m->getSimpleName(randomtree));
		}
			
		//set users groups to analyze
		SharedUtil util;
		vector<string> mGroups = m->getGroups();
		vector<string> tGroups = ct->getNamesOfGroups();
		util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony");	//sets the groups the user wants to analyze
		util.getCombos(groupComb, mGroups, numComp);
		m->setGroups(mGroups);
			
		if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); }
			
		Parsimony pars;
		counter = 0;
	
		Progress* reading;
		reading = new Progress("Comparing to random:", iters);
		
		if (m->control_pressed) { 
			delete reading; delete output;
			delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
			if (randomtree == "") {  outSum.close();  }
			for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } outputTypes.clear();
			m->clearGroups();
			return 0;
		}
			
		
		//get pscore for users tree
		userData.resize(numComp,0);  //data = AB, AC, BC, ABC.
		randomData.resize(numComp,0);  //data = AB, AC, BC, ABC.
		rscoreFreq.resize(numComp);  
		uscoreFreq.resize(numComp);  
		rCumul.resize(numComp);  
		uCumul.resize(numComp);  
		userTreeScores.resize(numComp);  
		UScoreSig.resize(numComp); 
				
		if (randomtree == "") {
			//get pscores for users trees
			for (int i = 0; i < T.size(); i++) {
				userData = pars.getValues(T[i], processors, outputDir);  //data = AB, AC, BC, ABC.
				
				if (m->control_pressed) { 
					delete reading; delete output;
					delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
					if (randomtree == "") {  outSum.close();  }
					for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } outputTypes.clear();
					m->clearGroups();
					return 0;
				}


				//output scores for each combination
				for(int k = 0; k < numComp; k++) {

					//update uscoreFreq
					map<int,double>::iterator it = uscoreFreq[k].find(userData[k]);
					if (it == uscoreFreq[k].end()) {//new score
						uscoreFreq[k][userData[k]] = 1;
					}else{ uscoreFreq[k][userData[k]]++; }
					
					//add users score to valid scores
					validScores[userData[k]] = userData[k];
					
					//save score for summary file
					userTreeScores[k].push_back(userData[k]);
				}
			}
			
			//get pscores for random trees
			for (int j = 0; j < iters; j++) {
								
				//create new tree with same num nodes and leaves as users
				randT = new Tree(ct);

				//create random relationships between nodes
				randT->assembleRandomTree();

				//get pscore of random tree
				randomData = pars.getValues(randT, processors, outputDir);
				
				if (m->control_pressed) { 
					delete reading;  delete output; delete randT;
					if (randomtree == "") {  outSum.close();  }
					for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } outputTypes.clear();
					delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
					m->clearGroups();
					return 0;
				}
					
				for(int r = 0; r < numComp; r++) {
					//add trees pscore to map of scores
					map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
					if (it != rscoreFreq[r].end()) {//already have that score
						rscoreFreq[r][randomData[r]]++;
					}else{//first time we have seen this score
						rscoreFreq[r][randomData[r]] = 1;
					}
			
					//add randoms score to validscores
					validScores[randomData[r]] = randomData[r];
				}
				
				//update progress bar
				reading->update(j);
				
				delete randT;
			}

		}else {
			//get pscores for random trees
			for (int j = 0; j < iters; j++) {
								
				//create new tree with same num nodes and leaves as users
				randT = new Tree(ct);
				//create random relationships between nodes

				randT->assembleRandomTree();
				
				if (m->control_pressed) { 
					delete reading; delete output; delete randT; delete ct; 
					for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
				}


				//get pscore of random tree
				randomData = pars.getValues(randT, processors, outputDir);
				
				if (m->control_pressed) { 
					delete reading; delete output; delete randT; delete ct; 
					for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
				}
			
				for(int r = 0; r < numComp; r++) {
					//add trees pscore to map of scores
					map<int,double>::iterator it = rscoreFreq[r].find(randomData[r]);
					if (it != rscoreFreq[r].end()) {//already have that score
						rscoreFreq[r][randomData[r]]++;
					}else{//first time we have seen this score
						rscoreFreq[r][randomData[r]] = 1;
					}
			
					//add randoms score to validscores
					validScores[randomData[r]] = randomData[r];
				}
				
				//update progress bar
				reading->update(j);
				
				delete randT;
			}
		}

		for(int a = 0; a < numComp; a++) {
			float rcumul = 0.0000;
			float ucumul = 0.0000;
			//this loop fills the cumulative maps and put 0.0000 in the score freq map to make it easier to print.
			for (map<int,double>::iterator it = validScores.begin(); it != validScores.end(); it++) { 
				if (randomtree == "") {
					map<int,double>::iterator it2 = uscoreFreq[a].find(it->first);
					//user data has that score 
					if (it2 != uscoreFreq[a].end()) { uscoreFreq[a][it->first] /= T.size(); ucumul+= it2->second;  }
					else { uscoreFreq[a][it->first] = 0.0000; } //no user trees with that score
					//make uCumul map
					uCumul[a][it->first] = ucumul;
				}
			
				//make rscoreFreq map and rCumul
				map<int,double>::iterator it2 = rscoreFreq[a].find(it->first);
				//get percentage of random trees with that info
				if (it2 != rscoreFreq[a].end()) {  rscoreFreq[a][it->first] /= iters; rcumul+= it2->second;  }
				else { rscoreFreq[a][it->first] = 0.0000; } //no random trees with that score
				rCumul[a][it->first] = rcumul;
			}
			
			//find the signifigance of each user trees score when compared to the random trees and save for printing the summary file
			for (int h = 0; h < userTreeScores[a].size(); h++) {
				UScoreSig[a].push_back(rCumul[a][userTreeScores[a][h]]);
			}
		}
		
		if (m->control_pressed) { 
				delete reading; delete output;
				delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
				if (randomtree == "") {  outSum.close();  }
				for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } outputTypes.clear();
				return 0;
		}
		
		//finish progress bar
		reading->finish();
		delete reading;
		
		printParsimonyFile();
		if (randomtree == "") { printUSummaryFile(); }
				
        delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
		
		if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();

		
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "ParsimonyCommand", "execute");
		exit(1);
	}
}