//********************************************************************************************************************** int GetSeqsCommand::readCount(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); variables["[extension]"] = m->getExtension(countfile); string outputFileName = getOutputFileName("count", variables); ofstream out; m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(countfile, in); bool wroteSomething = false; int selectedCount = 0; string headers = m->getline(in); m->gobble(in); out << headers << endl; string test = headers; vector<string> pieces = m->splitWhiteSpace(test); string name, rest; int thisTotal; rest = ""; while (!in.eof()) { if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } in >> name; m->gobble(in); in >> thisTotal; m->gobble(in); if (pieces.size() > 2) { rest = m->getline(in); m->gobble(in); } if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + rest + "\n"); } if (names.count(name) != 0) { out << name << '\t' << thisTotal << '\t' << rest << endl; wroteSomething = true; selectedCount+= thisTotal; } } in.close(); out.close(); //check for groups that have been eliminated CountTable ct; if (ct.testGroups(outputFileName)) { ct.readTable(outputFileName, true, false); ct.printTable(outputFileName); } if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); } outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readCount"); exit(1); } }
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){ try { CountTable countTable; if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; } //read countTable countTable.readTable(countfile, true, false); //fill Groups - checks for "all" and for any typo groups vector<string> nameGroups = countTable.getNamesOfGroups(); if (Groups.size() == 0) { Groups = nameGroups; } vector<string> dnamesGroups = designMap->getNamesGroups(); //sanity check bool error = false; if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number //is every group in counttable also in designmap for (int i = 0; i < nameGroups.size(); i++) { if (m->getControl_pressed()) { break; } if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; } } } if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; } //user selected groups - remove some groups from table if (Groups.size() != nameGroups.size()) { for (int i = 0; i < nameGroups.size(); i++) { if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); } } } //ask again in case order changed nameGroups = countTable.getNamesOfGroups(); int numGroups = nameGroups.size(); //create new table CountTable newTable; vector<string> treatments = designMap->getCategory(); map<string, vector<int> > clearedMap; for (int i = 0; i < treatments.size(); i++) { newTable.addGroup(treatments[i]); vector<int> temp; clearedMap[treatments[i]] = temp; } treatments = newTable.getNamesOfGroups(); set<string> namesToRemove; vector<string> namesOfSeqs = countTable.getNamesOfSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->getControl_pressed()) { break; } vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]); map<string, vector<int> > thisSeqsMap = clearedMap; for (int j = 0; j < numGroups; j++) { thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]); } //create new counts for seq for new table vector<int> newCounts; int totalAbund = 0; for (int j = 0; j < treatments.size(); j++){ int abund = mergeAbund(thisSeqsMap[treatments[j]]); newCounts.push_back(abund); //order matters, add in count for each treatment in new table. totalAbund += abund; } //add seq to new table if(totalAbund == 0) { namesToRemove.insert(namesOfSeqs[i]); }else { newTable.push_back(namesOfSeqs[i], newCounts); } } if (error) { m->setControl_pressed(true); return 0; } //remove sequences zeroed out by median method if (namesToRemove.size() != 0) { //print names ofstream out; string accnosFile = "accnosFile.temp"; util.openOutputFile(accnosFile, out); //output to .accnos file for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) { if (m->getControl_pressed()) { out.close(); util.mothurRemove(accnosFile); return 0; } out << *it << endl; } out.close(); //run remove.seqs string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine(); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map<string, vector<string> > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); util.mothurRemove(accnosFile); } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(countfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); newTable.printTable(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processCountFile"); exit(1); } }
//********************************************************************************************************************** SharedCommand::SharedCommand(string option) { try { abort = false; calledHelp = false; pickedGroups=false; allLines = 1; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { vector<string> myArray = setParameters(); OptionParser parser(option); map<string, string> parameters = parser.getParameters(); ValidParameters validParameter; map<string, string>::iterator it; //check to make sure all parameters are valid for command for (it = parameters.begin(); it != parameters.end(); it++) { if (!validParameter.isValidParameter(it->first, myArray, it->second)) { abort = true; } } //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.valid(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; } else { string path; it = parameters.find("list"); //user has given a template file if(it != parameters.end()){ path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["list"] = inputDir + it->second; } } it = parameters.find("group"); //user has given a template file if(it != parameters.end()){ path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["group"] = inputDir + it->second; } } it = parameters.find("count"); //user has given a template file if(it != parameters.end()){ path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["count"] = inputDir + it->second; } } it = parameters.find("biom"); //user has given a template file if(it != parameters.end()){ path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["biom"] = inputDir + it->second; } } } vector<string> tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["map"] = tempOutNames; //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.valid(parameters, "outputdir"); if (outputDir == "not found"){ outputDir = ""; } //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } biomfile = validParameter.validFile(parameters, "biom"); if (biomfile == "not open") { biomfile = ""; abort = true; } else if (biomfile == "not found") { biomfile = ""; } else { current->setBiomFile(biomfile); } ordergroupfile = validParameter.validFile(parameters, "ordergroup"); if (ordergroupfile == "not open") { abort = true; } else if (ordergroupfile == "not found") { ordergroupfile = ""; } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); CountTable temp; if (!temp.testGroups(countfile)) { m->mothurOut("\n[WARNING]: Your count file does not have group info, all reads will be assigned to mothurGroup.\n"); temp.readTable(countfile, false, false); //dont read groups map<string, int> seqs = temp.getNameMap(); CountTable newCountTable; newCountTable.addGroup("mothurGroup"); for (map<string, int>::iterator it = seqs.begin(); it != seqs.end(); it++) { vector<int> counts; counts.push_back(it->second); newCountTable.push_back(it->first, counts); } string newCountfileName = util.getRootName(countfile) + "mothurGroup" + util.getExtension(countfile); newCountTable.printTable(newCountfileName); current->setCountFile(newCountfileName); countfile = newCountfileName; outputNames.push_back(newCountfileName); } } if ((biomfile == "") && (listfile == "") && (countfile == "")) { //you must provide at least one of the following //is there are current file available for either of these? //give priority to list, then biom, then count listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { biomfile = current->getBiomFile(); if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current files. You must provide a list or biom or count file before you can use the make.shared command.\n"); abort = true; } } } } else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom.\n"); abort = true; } if (listfile != "") { if ((groupfile == "") && (countfile == "")) { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You need to provide a groupfile or countfile if you are going to use the list format.\n"); abort = true; } } } } string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { pickedGroups=true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } if ((listfile == "") && (biomfile == "") && (countfile != "")) { //building a shared file from a count file, require label if (labels.size() == 0) { m->mothurOut("[ERROR]: You must provide a label when converting a count file to a shared file, please correct.\n"); abort = true; } } } } catch(exception& e) { m->errorOut(e, "SharedCommand", "SharedCommand"); exit(1); } }
//********************************************************************************************************************** int RemoveRareCommand::processList(){ try { //you must provide a label because the names in the listfile need to be consistent string thisLabel = ""; if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); } else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); } else { thisLabel = *labels.begin(); } InputData input(listfile, "list"); ListVector* list = input.getListVector(); //get first one or the one we want if (thisLabel != "") { //use smart distancing set<string> userLabels; userLabels.insert(thisLabel); set<string> processedLabels; string lastLabel = list->getLabel(); while((list != NULL) && (userLabels.size() != 0)) { if(userLabels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); delete list; list = input.getListVector(lastLabel); break; } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } if (userLabels.size() != 0) { m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine(); list = input.getListVector(lastLabel); } } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); variables["[tag]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); variables["[extension]"] = m->getExtension(groupfile); string outputGroupFileName = getOutputFileName("group", variables); variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); variables["[extension]"] = m->getExtension(countfile); string outputCountFileName = getOutputFileName("count", variables); ofstream out, outGroup; m->openOutputFile(outputFileName, out); bool wroteSomething = false; //if groupfile is given then use it GroupMap* groupMap; CountTable ct; if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); SharedUtil util; vector<string> namesGroups = groupMap->getNamesOfGroups(); util.setGroups(Groups, namesGroups); m->openOutputFile(outputGroupFileName, outGroup); }else if (countfile != "") { ct.readTable(countfile, true, false); if (ct.hasGroupInfo()) { vector<string> namesGroups = ct.getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, namesGroups); } } if (list != NULL) { vector<string> binLabels = list->getLabels(); vector<string> newLabels; //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close(); m->mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list->get(i); vector<string> names; string saveBinNames = binnames; m->splitAtComma(binnames, names); int binsize = names.size(); vector<string> newGroupFile; if (groupfile != "") { vector<string> newNames; saveBinNames = ""; for(int k = 0; k < names.size(); k++) { string group = groupMap->getGroup(names[k]); if (m->inUsersGroups(group, Groups)) { newGroupFile.push_back(names[k] + "\t" + group); newNames.push_back(names[k]); saveBinNames += names[k] + ","; } } names = newNames; binsize = names.size(); saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); }else if (countfile != "") { saveBinNames = ""; binsize = 0; for(int k = 0; k < names.size(); k++) { if (ct.hasGroupInfo()) { vector<string> thisSeqsGroups = ct.getGroups(names[k]); int thisSeqsCount = 0; for (int n = 0; n < thisSeqsGroups.size(); n++) { if (m->inUsersGroups(thisSeqsGroups[n], Groups)) { thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]); } } binsize += thisSeqsCount; //if you don't have any seqs from the groups the user wants, then remove you. if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); } else { saveBinNames += names[k] + ","; } }else { binsize += ct.getNumSeqs(names[k]); saveBinNames += names[k] + ","; } } saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); } if (binsize > nseqs) { //keep bin newList.push_back(saveBinNames); newLabels.push_back(binLabels[i]); if (groupfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } } else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { ct.remove(newGroupFile[k]); } } }else { if (countfile != "") { for(int k = 0; k < names.size(); k++) { ct.remove(names[k]); } } } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newLabels); newList.printHeaders(out); newList.print(out); } } out.close(); if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); } if (countfile != "") { if (ct.hasGroupInfo()) { vector<string> allGroups = ct.getNamesOfGroups(); for (int i = 0; i < allGroups.size(); i++) { if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); } } } ct.printTable(outputCountFileName); outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); } if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine(); } outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processList"); exit(1); } }