Example #1
0
//**********************************************************************************************************************
int GetSeqsCommand::readCount(){
	try {
		string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
		map<string, string> variables; 
		variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
        variables["[extension]"] = m->getExtension(countfile);
		string outputFileName = getOutputFileName("count", variables);
		
		ofstream out;
		m->openOutputFile(outputFileName, out);
		
		ifstream in;
		m->openInputFile(countfile, in);
		
		bool wroteSomething = false;
		int selectedCount = 0;
		
        string headers = m->getline(in); m->gobble(in);
        out << headers << endl;
        string test = headers; vector<string> pieces = m->splitWhiteSpace(test);
        
        string name, rest; int thisTotal; rest = "";
        while (!in.eof()) {
            
            if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
            
            in >> name; m->gobble(in); 
            in >> thisTotal; m->gobble(in);
            if (pieces.size() > 2) {  rest = m->getline(in); m->gobble(in);  }
            if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); }
            
            if (names.count(name) != 0) {
                out << name << '\t' << thisTotal << '\t' << rest << endl;
                wroteSomething = true;
                selectedCount+= thisTotal;
            }
        }
        in.close();
		out.close();
        
        //check for groups that have been eliminated
        CountTable ct;
        if (ct.testGroups(outputFileName)) {
            ct.readTable(outputFileName, true, false);
            ct.printTable(outputFileName);
        }
		
		if (wroteSomething == false) {  m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine();  }
		outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "GetSeqsCommand", "readCount");
		exit(1);
	}
}
Example #2
0
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){
    try {
        CountTable countTable;
        if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; }
        
        //read countTable
        countTable.readTable(countfile, true, false);
        
        //fill Groups - checks for "all" and for any typo groups
        vector<string> nameGroups = countTable.getNamesOfGroups();
        if (Groups.size() == 0) { Groups = nameGroups; }
        
        
        vector<string> dnamesGroups = designMap->getNamesGroups();
        
        //sanity check
        bool error = false;
        if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number
            //is every group in counttable also in designmap
            for (int i = 0; i < nameGroups.size(); i++) {
                if (m->getControl_pressed()) { break; }
                if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; }
            }
            
        }
        if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; }
        
        //user selected groups - remove some groups from table
        if (Groups.size() != nameGroups.size()) {
            for (int i = 0; i < nameGroups.size(); i++) {
                if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); }
            }
        }
        //ask again in case order changed
        nameGroups = countTable.getNamesOfGroups();
        int numGroups = nameGroups.size();
        
        //create new table
        CountTable newTable;
        vector<string> treatments = designMap->getCategory();
        map<string, vector<int> > clearedMap;
        for (int i = 0; i < treatments.size(); i++) {
            newTable.addGroup(treatments[i]);
            vector<int> temp;
            clearedMap[treatments[i]] = temp;
        }
        treatments = newTable.getNamesOfGroups();
        
        set<string> namesToRemove;
        vector<string> namesOfSeqs = countTable.getNamesOfSeqs();
        for (int i = 0; i < namesOfSeqs.size(); i++) {
            
            if (m->getControl_pressed()) { break; }
            
            vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]);
            map<string, vector<int> > thisSeqsMap = clearedMap;
            
            for (int j = 0; j < numGroups; j++) {
                thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]);
            }
        
            //create new counts for seq for new table
            vector<int> newCounts; int totalAbund = 0;
            for (int j = 0; j < treatments.size(); j++){
                int abund = mergeAbund(thisSeqsMap[treatments[j]]);
                newCounts.push_back(abund);  //order matters, add in count for each treatment in new table.
                totalAbund += abund;
            }
            
            //add seq to new table
            if(totalAbund == 0) {
                namesToRemove.insert(namesOfSeqs[i]);
            }else { newTable.push_back(namesOfSeqs[i], newCounts); }
        }
        
        if (error) { m->setControl_pressed(true); return 0; }
        
        //remove sequences zeroed out by median method
        if (namesToRemove.size() != 0) {
            //print names
            ofstream out;
            string accnosFile = "accnosFile.temp";
            util.openOutputFile(accnosFile, out);
            
            //output to .accnos file
            for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) {
                if (m->getControl_pressed()) {  out.close(); util.mothurRemove(accnosFile); return 0; }
                out << *it << endl;
            }
            out.close();

            //run remove.seqs
            string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile;
            
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine();
            current->setMothurCalling(true);
            
            Command* removeCommand = new RemoveSeqsCommand(inputString);
            removeCommand->execute();
            
            map<string, vector<string> > filenames = removeCommand->getOutputFiles();
            
            delete removeCommand;
            current->setMothurCalling(false);
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            
            util.mothurRemove(accnosFile);
        }
    
        string thisOutputDir = outputDir;
        if (outputDir == "") {  thisOutputDir += util.hasPath(countfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile));
        variables["[extension]"] = util.getExtension(countfile);
        string outputFileName = getOutputFileName("count", variables);
        outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
        
        newTable.printTable(outputFileName);
        
        return 0;
        
    }
    catch(exception& e) {
        m->errorOut(e, "MergeGroupsCommand", "processCountFile");
        exit(1);
    }
}
Example #3
0
//**********************************************************************************************************************
SharedCommand::SharedCommand(string option)  {
	try {
        abort = false; calledHelp = false; pickedGroups=false;
		allLines = 1;

		//allow user to run help
		if(option == "help") { help(); abort = true; calledHelp = true; }
		else if(option == "citation") { citation(); abort = true; calledHelp = true;}

		else {

			 vector<string> myArray = setParameters();

			 OptionParser parser(option);
			 map<string, string> parameters = parser.getParameters();

			 ValidParameters validParameter;
			 map<string, string>::iterator it;

			 //check to make sure all parameters are valid for command
			 for (it = parameters.begin(); it != parameters.end(); it++) {
				 if (!validParameter.isValidParameter(it->first, myArray, it->second)) {  abort = true;  }
			 }

			 //if the user changes the input directory command factory will send this info to us in the output parameter
			string inputDir = validParameter.valid(parameters, "inputdir");
			 if (inputDir == "not found"){	inputDir = "";		}
			 else {
				 string path;
				 it = parameters.find("list");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["list"] = inputDir + it->second;		}
				 }

				 it = parameters.find("group");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["group"] = inputDir + it->second;		}
				 }

				 it = parameters.find("count");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["count"] = inputDir + it->second;		}
				 }

                 it = parameters.find("biom");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["biom"] = inputDir + it->second;		}
				 }
			 }

             vector<string> tempOutNames;
             outputTypes["shared"] = tempOutNames;
             outputTypes["group"] = tempOutNames;
             outputTypes["map"] = tempOutNames;

			 //if the user changes the output directory command factory will send this info to us in the output parameter
			 outputDir = validParameter.valid(parameters, "outputdir");		if (outputDir == "not found"){	outputDir = "";	}

			 //check for required parameters
			 listfile = validParameter.validFile(parameters, "list");
			 if (listfile == "not open") { listfile = ""; abort = true; }
			 else if (listfile == "not found") { listfile = "";  }
			 else { current->setListFile(listfile); }

             biomfile = validParameter.validFile(parameters, "biom");
             if (biomfile == "not open") { biomfile = ""; abort = true; }
             else if (biomfile == "not found") { biomfile = "";  }
             else { current->setBiomFile(biomfile); }

			 ordergroupfile = validParameter.validFile(parameters, "ordergroup");
			 if (ordergroupfile == "not open") { abort = true; }
			 else if (ordergroupfile == "not found") { ordergroupfile = ""; }

			 groupfile = validParameter.validFile(parameters, "group");
			 if (groupfile == "not open") { groupfile = ""; abort = true; }
			 else if (groupfile == "not found") { groupfile = ""; }
			 else {  current->setGroupFile(groupfile); }

             countfile = validParameter.validFile(parameters, "count");
             if (countfile == "not open") { countfile = ""; abort = true; }
             else if (countfile == "not found") { countfile = ""; }
             else {
                 current->setCountFile(countfile);
                 CountTable temp;
                 if (!temp.testGroups(countfile)) {
                     m->mothurOut("\n[WARNING]: Your count file does not have group info, all reads will be assigned to mothurGroup.\n");
                     
                     temp.readTable(countfile, false, false); //dont read groups
                     map<string, int> seqs = temp.getNameMap();
                     
                     CountTable newCountTable;
                     newCountTable.addGroup("mothurGroup");
                     
                     for (map<string, int>::iterator it = seqs.begin(); it != seqs.end(); it++) {
                         vector<int> counts; counts.push_back(it->second);
                         newCountTable.push_back(it->first, counts);
                     }
                     
                     string newCountfileName = util.getRootName(countfile) + "mothurGroup" + util.getExtension(countfile);
                     newCountTable.printTable(newCountfileName);
                     
                     current->setCountFile(newCountfileName);
                     countfile = newCountfileName;
                     outputNames.push_back(newCountfileName);
                 }
             }

            if ((biomfile == "") && (listfile == "") && (countfile == "")) { //you must provide at least one of the following
				//is there are current file available for either of these?
				//give priority to list, then biom, then count
				listfile = current->getListFile();
				if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter.\n");  }
				else {
					biomfile = current->getBiomFile();
                    if (biomfile != "") {  m->mothurOut("Using " + biomfile + " as input file for the biom parameter.\n"); }
					else {
                        countfile = current->getCountFile();
                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter.\n");  }
                        else {
                            m->mothurOut("[ERROR]: No valid current files. You must provide a list or biom or count file before you can use the make.shared command.\n");  abort = true;
                        }

					}
				}
			}
			else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom.\n"); abort = true; }

			if (listfile != "") {
				if ((groupfile == "") && (countfile == "")) {
					groupfile = current->getGroupFile();
					if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n");  }
					else {
						countfile = current->getCountFile();
                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); }
                        else { m->mothurOut("[ERROR]: You need to provide a groupfile or countfile if you are going to use the list format.\n");  abort = true; }
					}
				}
			}


			 string groups = validParameter.valid(parameters, "groups");
			 if (groups == "not found") { groups = ""; }
			 else {
                 pickedGroups=true;
				 util.splitAtDash(groups, Groups);
                if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } }
			 }

			 //check for optional parameter and set defaults
			 // ...at some point should added some additional type checking...
			 string label = validParameter.valid(parameters, "label");
			 if (label == "not found") { label = ""; }
			 else {
				 if(label != "all") {  util.splitAtDash(label, labels);  allLines = 0;  }
				 else { allLines = 1;  }
			 }
            
            if ((listfile == "") && (biomfile == "") && (countfile != "")) { //building a shared file from a count file, require label
                if (labels.size() == 0) {
                    m->mothurOut("[ERROR]: You must provide a label when converting a count file to a shared file, please correct.\n");  abort = true;
                }
            }
		}

	}
	catch(exception& e) {
		m->errorOut(e, "SharedCommand", "SharedCommand");
		exit(1);
	}
}
Example #4
0
//**********************************************************************************************************************
int RemoveRareCommand::processList(){
	try {
				
		//you must provide a label because the names in the listfile need to be consistent
		string thisLabel = "";
		if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); }
		else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); }
		else { thisLabel = *labels.begin(); }
		
		InputData input(listfile, "list");
		ListVector* list = input.getListVector();
		
		//get first one or the one we want
		if (thisLabel != "") { 	
			//use smart distancing
			set<string> userLabels; userLabels.insert(thisLabel);
			set<string> processedLabels;
			string lastLabel = list->getLabel();
			while((list != NULL) && (userLabels.size() != 0)) {
				if(userLabels.count(list->getLabel()) == 1){
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					break;
				}
				
				if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
					processedLabels.insert(list->getLabel());
					userLabels.erase(list->getLabel());
					delete list;
					list = input.getListVector(lastLabel);
					break;
				}
				lastLabel = list->getLabel();
				delete list;
				list = input.getListVector();
			}
			if (userLabels.size() != 0) { 
				m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + ".");  m->mothurOutEndLine();
				list = input.getListVector(lastLabel); 
			}
		}
        
        string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
        variables["[extension]"] = m->getExtension(listfile);
        variables["[tag]"] = list->getLabel();
		string outputFileName = getOutputFileName("list", variables);
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
        variables["[extension]"] = m->getExtension(groupfile);
		string outputGroupFileName = getOutputFileName("group", variables);
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
        variables["[extension]"] = m->getExtension(countfile);
        string outputCountFileName = getOutputFileName("count", variables);
        
		ofstream out, outGroup;
		m->openOutputFile(outputFileName, out);
		
		bool wroteSomething = false;

		
		//if groupfile is given then use it
		GroupMap* groupMap;
        CountTable ct;
		if (groupfile != "") { 
			groupMap = new GroupMap(groupfile); groupMap->readMap(); 
			SharedUtil util;
			vector<string> namesGroups = groupMap->getNamesOfGroups();
			util.setGroups(Groups, namesGroups);
			m->openOutputFile(outputGroupFileName, outGroup);
		}else if (countfile != "") {
            ct.readTable(countfile, true, false);
            if (ct.hasGroupInfo()) {
                vector<string> namesGroups = ct.getNamesOfGroups();
                SharedUtil util;
                util.setGroups(Groups, namesGroups);
            }
        }
		
		
		if (list != NULL) {
            
            vector<string> binLabels = list->getLabels();
            vector<string> newLabels;
            
			//make a new list vector
			ListVector newList;
			newList.setLabel(list->getLabel());
			
			//for each bin
			for (int i = 0; i < list->getNumBins(); i++) {
				if (m->control_pressed) {  if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close();  m->mothurRemove(outputFileName);  return 0; }
				
				//parse out names that are in accnos file
				string binnames = list->get(i);
				vector<string> names;
				string saveBinNames = binnames;
				m->splitAtComma(binnames, names);
                int binsize = names.size();
				
				vector<string> newGroupFile;
				if (groupfile != "") {
					vector<string> newNames;
					saveBinNames = "";
					for(int k = 0; k < names.size(); k++) {
						string group = groupMap->getGroup(names[k]);
						
						if (m->inUsersGroups(group, Groups)) {
							newGroupFile.push_back(names[k] + "\t" + group); 
								
							newNames.push_back(names[k]);	
							saveBinNames += names[k] + ",";
						}
					}
					names = newNames; binsize = names.size();
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
				}else if (countfile != "") {
					saveBinNames = "";
                    binsize = 0;
					for(int k = 0; k < names.size(); k++) {
                        if (ct.hasGroupInfo()) {
                            vector<string> thisSeqsGroups = ct.getGroups(names[k]);
                            
                            int thisSeqsCount = 0;
                            for (int n = 0; n < thisSeqsGroups.size(); n++) {
                                if (m->inUsersGroups(thisSeqsGroups[n], Groups)) {
                                    thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]);
                                }
                            }
                            binsize += thisSeqsCount;
                            //if you don't have any seqs from the groups the user wants, then remove you.
                            if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); }
                            else { saveBinNames += names[k] + ","; }
                        }else {
                            binsize += ct.getNumSeqs(names[k]); 
                            saveBinNames += names[k] + ",";
                        }
					}
					saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1);
                }

				if (binsize > nseqs) { //keep bin
					newList.push_back(saveBinNames);
                    newLabels.push_back(binLabels[i]);
					if (groupfile != "") {  for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; }  }
                    else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) {  ct.remove(newGroupFile[k]); } }  
				}else {  if (countfile != "") {  for(int k = 0; k < names.size(); k++) {  ct.remove(names[k]); } }  }
			}
			
			//print new listvector
			if (newList.getNumBins() != 0) {
				wroteSomething = true;
				newList.setLabels(newLabels);
                newList.printHeaders(out);
                newList.print(out);
			}
		}	
		
		out.close();
		if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); }
        if (countfile != "") { 
            if (ct.hasGroupInfo()) {
                vector<string> allGroups = ct.getNamesOfGroups();
                for (int i = 0; i < allGroups.size(); i++) {
                    if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); }
                }

            }
            ct.printTable(outputCountFileName);
            outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); 
        }
		
		if (wroteSomething == false) {  m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine();  }
		outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "RemoveRareCommand", "processList");
		exit(1);
	}
}