Exemplo n.º 1
0
int DeconvoluteCommand::execute() {	
	try {
		
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
        
		//prepare filenames and open files
        map<string, string> variables; 
        variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
        string outNameFile = getOutputFileName("name", variables);
        string outCountFile = getOutputFileName("count", variables);
        variables["[extension]"] = util.getExtension(fastafile);
		string outFastaFile = getOutputFileName("fasta", variables);
		
		map<string, string> nameMap;
		map<string, string>::iterator itNames;
		if (namefile != "")  {
            util.readNames(namefile, nameMap);
            if (namefile == outNameFile){
                //prepare filenames and open files
                map<string, string> mvariables;
                mvariables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
                mvariables["[tag]"] = "unique";
                outNameFile = getOutputFileName("name", mvariables);
            }
        }
        CountTable ct;
        if (countfile != "")  {  
            ct.readTable(countfile, true, false);
            if (countfile == outCountFile){
                //prepare filenames and open files
                map<string, string> mvariables;
                mvariables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
                mvariables["[tag]"] = "unique";
                outCountFile = getOutputFileName("count", mvariables);   }
        }
		
		if (m->getControl_pressed()) { return 0; }
		
		ifstream in; 
		util.openInputFile(fastafile, in);
		
		ofstream outFasta;
		util.openOutputFile(outFastaFile, outFasta);
		
		map<string, string> sequenceStrings; //sequenceString -> list of names.  "atgc...." -> seq1,seq2,seq3.
		map<string, string>::iterator itStrings;
		set<string> nameInFastaFile; //for sanity checking
		set<string>::iterator itname;
		vector<string> nameFileOrder;
        CountTable newCt;
		int count = 0;
		while (!in.eof()) {
			
			if (m->getControl_pressed()) { in.close(); outFasta.close(); util.mothurRemove(outFastaFile); return 0; }
			
			Sequence seq(in);
			
			if (seq.getName() != "") {
				
				//sanity checks
				itname = nameInFastaFile.find(seq.getName());
				if (itname == nameInFastaFile.end()) { nameInFastaFile.insert(seq.getName());  }
				else { m->mothurOut("[ERROR]: You already have a sequence named " + seq.getName() + " in your fasta file, sequence names must be unique, please correct."); m->mothurOutEndLine(); }

				itStrings = sequenceStrings.find(seq.getAligned());
				
				if (itStrings == sequenceStrings.end()) { //this is a new unique sequence
					//output to unique fasta file
					seq.printSequence(outFasta);
					
					if (namefile != "") {
						itNames = nameMap.find(seq.getName());
						
						if (itNames == nameMap.end()) { //namefile and fastafile do not match
							m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine();
						}else {
                            if (format == "name") { sequenceStrings[seq.getAligned()] = itNames->second;  nameFileOrder.push_back(seq.getAligned());
                            }else {  newCt.push_back(seq.getName(), util.getNumNames(itNames->second)); sequenceStrings[seq.getAligned()] = seq.getName();	nameFileOrder.push_back(seq.getAligned()); }
						}
					}else if (countfile != "") {
                        if (format == "name") {
                            int numSeqs = ct.getNumSeqs(seq.getName());
                            string expandedName = seq.getName()+"_0";
                            for (int i = 1; i < numSeqs; i++) {  expandedName += "," + seq.getName() + "_" + toString(i);  }
                            sequenceStrings[seq.getAligned()] = expandedName;  nameFileOrder.push_back(seq.getAligned());
                        }else {
                            ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table
                            sequenceStrings[seq.getAligned()] = seq.getName();	nameFileOrder.push_back(seq.getAligned());
                        }
                    }else {
                        if (format == "name") { sequenceStrings[seq.getAligned()] = seq.getName();	nameFileOrder.push_back(seq.getAligned()); }
                        else {  newCt.push_back(seq.getName()); sequenceStrings[seq.getAligned()] = seq.getName();	nameFileOrder.push_back(seq.getAligned()); }
                    }
				}else { //this is a dup
					if (namefile != "") {
						itNames = nameMap.find(seq.getName());
						
						if (itNames == nameMap.end()) { //namefile and fastafile do not match
							m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine();
						}else {
                            if (format == "name") { sequenceStrings[seq.getAligned()] += "," + itNames->second;  }
                            else {  int currentReps = newCt.getNumSeqs(itStrings->second);  newCt.setNumSeqs(itStrings->second, currentReps+(util.getNumNames(itNames->second)));  }
						}
                    }else if (countfile != "") {
                        if (format == "name") {
                            int numSeqs = ct.getNumSeqs(seq.getName());
                            string expandedName = seq.getName()+"_0";
                            for (int i = 1; i < numSeqs; i++) {  expandedName += "," + seq.getName() + "_" + toString(i);  }
                            sequenceStrings[seq.getAligned()] += "," + expandedName;
                        }else {
                            int num = ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table
                            if (num != 0) { //its in the table
                                ct.mergeCounts(itStrings->second, seq.getName()); //merges counts and saves in uniques name
                            }
                        }
                    }else {
                        if (format == "name") {  sequenceStrings[seq.getAligned()] += "," + seq.getName(); }
                        else {  int currentReps = newCt.getNumSeqs(itStrings->second); newCt.setNumSeqs(itStrings->second, currentReps+1);  }
                    }
				}
				count++;
			}
			
			util.gobble(in);
			
			if(count % 1000 == 0)	{ m->mothurOutJustToScreen(toString(count) + "\t" + toString(sequenceStrings.size()) + "\n");	}
		}
		
		if(count % 1000 != 0)	{ m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine();	}
		
		in.close();
		outFasta.close();
		
		if (m->getControl_pressed()) { util.mothurRemove(outFastaFile); return 0; }
        
		//print new names file
		ofstream outNames;
		if (format == "name") { util.openOutputFile(outNameFile, outNames); outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile);   }
        else { util.openOutputFile(outCountFile, outNames); outputTypes["count"].push_back(outCountFile); outputNames.push_back(outCountFile);                }
        
        if ((countfile != "") && (format == "count")) { ct.printHeaders(outNames); }
        else if ((countfile == "") && (format == "count")) { newCt.printHeaders(outNames); }
		
		for (int i = 0; i < nameFileOrder.size(); i++) {
			if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outFastaFile); outNames.close(); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; }
			
			itStrings = sequenceStrings.find(nameFileOrder[i]);
			
			if (itStrings != sequenceStrings.end()) {
                if (format == "name") {
                    //get rep name
                    int pos = (itStrings->second).find_first_of(',');
                    
                    if (pos == string::npos) { // only reps itself
                        outNames << itStrings->second << '\t' << itStrings->second << endl;
                    }else {
                        outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl;
                    }
                }else {
                    if (countfile != "") {  ct.printSeq(outNames, itStrings->second);  }
                    else if (format == "count")  {  newCt.printSeq(outNames, itStrings->second);  }
                }
			}else{ m->mothurOut("[ERROR]: mismatch in namefile print."); m->mothurOutEndLine(); m->setControl_pressed(true); }
		}
		outNames.close();
		
		if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outFastaFile); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); }  return 0; }
		
		m->mothurOut("\nOutput File Names: \n"); 
		outputNames.push_back(outFastaFile);   outputTypes["fasta"].push_back(outFastaFile);  
        for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		//set fasta file as new current fastafile
		string currentName = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); }
		}
		
		itTypes = outputTypes.find("name");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); }
		}
        
        itTypes = outputTypes.find("count");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); }
		}
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "DeconvoluteCommand", "execute");
		exit(1);
	}
}
Exemplo n.º 2
0
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){
    try {
        CountTable countTable;
        if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; }
        
        //read countTable
        countTable.readTable(countfile, true, false);
        
        //fill Groups - checks for "all" and for any typo groups
        vector<string> nameGroups = countTable.getNamesOfGroups();
        if (Groups.size() == 0) { Groups = nameGroups; }
        
        
        vector<string> dnamesGroups = designMap->getNamesGroups();
        
        //sanity check
        bool error = false;
        if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number
            //is every group in counttable also in designmap
            for (int i = 0; i < nameGroups.size(); i++) {
                if (m->getControl_pressed()) { break; }
                if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; }
            }
            
        }
        if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; }
        
        //user selected groups - remove some groups from table
        if (Groups.size() != nameGroups.size()) {
            for (int i = 0; i < nameGroups.size(); i++) {
                if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); }
            }
        }
        //ask again in case order changed
        nameGroups = countTable.getNamesOfGroups();
        int numGroups = nameGroups.size();
        
        //create new table
        CountTable newTable;
        vector<string> treatments = designMap->getCategory();
        map<string, vector<int> > clearedMap;
        for (int i = 0; i < treatments.size(); i++) {
            newTable.addGroup(treatments[i]);
            vector<int> temp;
            clearedMap[treatments[i]] = temp;
        }
        treatments = newTable.getNamesOfGroups();
        
        set<string> namesToRemove;
        vector<string> namesOfSeqs = countTable.getNamesOfSeqs();
        for (int i = 0; i < namesOfSeqs.size(); i++) {
            
            if (m->getControl_pressed()) { break; }
            
            vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]);
            map<string, vector<int> > thisSeqsMap = clearedMap;
            
            for (int j = 0; j < numGroups; j++) {
                thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]);
            }
        
            //create new counts for seq for new table
            vector<int> newCounts; int totalAbund = 0;
            for (int j = 0; j < treatments.size(); j++){
                int abund = mergeAbund(thisSeqsMap[treatments[j]]);
                newCounts.push_back(abund);  //order matters, add in count for each treatment in new table.
                totalAbund += abund;
            }
            
            //add seq to new table
            if(totalAbund == 0) {
                namesToRemove.insert(namesOfSeqs[i]);
            }else { newTable.push_back(namesOfSeqs[i], newCounts); }
        }
        
        if (error) { m->setControl_pressed(true); return 0; }
        
        //remove sequences zeroed out by median method
        if (namesToRemove.size() != 0) {
            //print names
            ofstream out;
            string accnosFile = "accnosFile.temp";
            util.openOutputFile(accnosFile, out);
            
            //output to .accnos file
            for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) {
                if (m->getControl_pressed()) {  out.close(); util.mothurRemove(accnosFile); return 0; }
                out << *it << endl;
            }
            out.close();

            //run remove.seqs
            string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile;
            
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine();
            current->setMothurCalling(true);
            
            Command* removeCommand = new RemoveSeqsCommand(inputString);
            removeCommand->execute();
            
            map<string, vector<string> > filenames = removeCommand->getOutputFiles();
            
            delete removeCommand;
            current->setMothurCalling(false);
            m->mothurOut("/******************************************/"); m->mothurOutEndLine();
            
            util.mothurRemove(accnosFile);
        }
    
        string thisOutputDir = outputDir;
        if (outputDir == "") {  thisOutputDir += util.hasPath(countfile);  }
        map<string, string> variables;
        variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile));
        variables["[extension]"] = util.getExtension(countfile);
        string outputFileName = getOutputFileName("count", variables);
        outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
        
        newTable.printTable(outputFileName);
        
        return 0;
        
    }
    catch(exception& e) {
        m->errorOut(e, "MergeGroupsCommand", "processCountFile");
        exit(1);
    }
}
Exemplo n.º 3
0
//**********************************************************************************************************************
SharedCommand::SharedCommand(string option)  {
	try {
        abort = false; calledHelp = false; pickedGroups=false;
		allLines = 1;

		//allow user to run help
		if(option == "help") { help(); abort = true; calledHelp = true; }
		else if(option == "citation") { citation(); abort = true; calledHelp = true;}

		else {

			 vector<string> myArray = setParameters();

			 OptionParser parser(option);
			 map<string, string> parameters = parser.getParameters();

			 ValidParameters validParameter;
			 map<string, string>::iterator it;

			 //check to make sure all parameters are valid for command
			 for (it = parameters.begin(); it != parameters.end(); it++) {
				 if (!validParameter.isValidParameter(it->first, myArray, it->second)) {  abort = true;  }
			 }

			 //if the user changes the input directory command factory will send this info to us in the output parameter
			string inputDir = validParameter.valid(parameters, "inputdir");
			 if (inputDir == "not found"){	inputDir = "";		}
			 else {
				 string path;
				 it = parameters.find("list");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["list"] = inputDir + it->second;		}
				 }

				 it = parameters.find("group");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["group"] = inputDir + it->second;		}
				 }

				 it = parameters.find("count");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["count"] = inputDir + it->second;		}
				 }

                 it = parameters.find("biom");
				 //user has given a template file
				 if(it != parameters.end()){
					 path = util.hasPath(it->second);
					 //if the user has not given a path then, add inputdir. else leave path alone.
					 if (path == "") {	parameters["biom"] = inputDir + it->second;		}
				 }
			 }

             vector<string> tempOutNames;
             outputTypes["shared"] = tempOutNames;
             outputTypes["group"] = tempOutNames;
             outputTypes["map"] = tempOutNames;

			 //if the user changes the output directory command factory will send this info to us in the output parameter
			 outputDir = validParameter.valid(parameters, "outputdir");		if (outputDir == "not found"){	outputDir = "";	}

			 //check for required parameters
			 listfile = validParameter.validFile(parameters, "list");
			 if (listfile == "not open") { listfile = ""; abort = true; }
			 else if (listfile == "not found") { listfile = "";  }
			 else { current->setListFile(listfile); }

             biomfile = validParameter.validFile(parameters, "biom");
             if (biomfile == "not open") { biomfile = ""; abort = true; }
             else if (biomfile == "not found") { biomfile = "";  }
             else { current->setBiomFile(biomfile); }

			 ordergroupfile = validParameter.validFile(parameters, "ordergroup");
			 if (ordergroupfile == "not open") { abort = true; }
			 else if (ordergroupfile == "not found") { ordergroupfile = ""; }

			 groupfile = validParameter.validFile(parameters, "group");
			 if (groupfile == "not open") { groupfile = ""; abort = true; }
			 else if (groupfile == "not found") { groupfile = ""; }
			 else {  current->setGroupFile(groupfile); }

             countfile = validParameter.validFile(parameters, "count");
             if (countfile == "not open") { countfile = ""; abort = true; }
             else if (countfile == "not found") { countfile = ""; }
             else {
                 current->setCountFile(countfile);
                 CountTable temp;
                 if (!temp.testGroups(countfile)) {
                     m->mothurOut("\n[WARNING]: Your count file does not have group info, all reads will be assigned to mothurGroup.\n");
                     
                     temp.readTable(countfile, false, false); //dont read groups
                     map<string, int> seqs = temp.getNameMap();
                     
                     CountTable newCountTable;
                     newCountTable.addGroup("mothurGroup");
                     
                     for (map<string, int>::iterator it = seqs.begin(); it != seqs.end(); it++) {
                         vector<int> counts; counts.push_back(it->second);
                         newCountTable.push_back(it->first, counts);
                     }
                     
                     string newCountfileName = util.getRootName(countfile) + "mothurGroup" + util.getExtension(countfile);
                     newCountTable.printTable(newCountfileName);
                     
                     current->setCountFile(newCountfileName);
                     countfile = newCountfileName;
                     outputNames.push_back(newCountfileName);
                 }
             }

            if ((biomfile == "") && (listfile == "") && (countfile == "")) { //you must provide at least one of the following
				//is there are current file available for either of these?
				//give priority to list, then biom, then count
				listfile = current->getListFile();
				if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter.\n");  }
				else {
					biomfile = current->getBiomFile();
                    if (biomfile != "") {  m->mothurOut("Using " + biomfile + " as input file for the biom parameter.\n"); }
					else {
                        countfile = current->getCountFile();
                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter.\n");  }
                        else {
                            m->mothurOut("[ERROR]: No valid current files. You must provide a list or biom or count file before you can use the make.shared command.\n");  abort = true;
                        }

					}
				}
			}
			else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom.\n"); abort = true; }

			if (listfile != "") {
				if ((groupfile == "") && (countfile == "")) {
					groupfile = current->getGroupFile();
					if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n");  }
					else {
						countfile = current->getCountFile();
                        if (countfile != "") {  m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); }
                        else { m->mothurOut("[ERROR]: You need to provide a groupfile or countfile if you are going to use the list format.\n");  abort = true; }
					}
				}
			}


			 string groups = validParameter.valid(parameters, "groups");
			 if (groups == "not found") { groups = ""; }
			 else {
                 pickedGroups=true;
				 util.splitAtDash(groups, Groups);
                if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } }
			 }

			 //check for optional parameter and set defaults
			 // ...at some point should added some additional type checking...
			 string label = validParameter.valid(parameters, "label");
			 if (label == "not found") { label = ""; }
			 else {
				 if(label != "all") {  util.splitAtDash(label, labels);  allLines = 0;  }
				 else { allLines = 1;  }
			 }
            
            if ((listfile == "") && (biomfile == "") && (countfile != "")) { //building a shared file from a count file, require label
                if (labels.size() == 0) {
                    m->mothurOut("[ERROR]: You must provide a label when converting a count file to a shared file, please correct.\n");  abort = true;
                }
            }
		}

	}
	catch(exception& e) {
		m->errorOut(e, "SharedCommand", "SharedCommand");
		exit(1);
	}
}