//********************************************************************************************************************** int GetSeqsCommand::readList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); ifstream in; m->openInputFile(listfile, in); bool wroteSomething = false; int selectedCount = 0; if (m->debug) { set<string> temp; sanity["list"] = temp; } while(!in.eof()){ selectedCount = 0; //read in list vector ListVector list(in); //make a new list vector ListVector newList; newList.setLabel(list.getLabel()); variables["[distance]"] = list.getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; m->openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); vector<string> binLabels = list.getLabels(); vector<string> newBinLabels; if (m->control_pressed) { in.close(); out.close(); return 0; } //for each bin for (int i = 0; i < list.getNumBins(); i++) { //parse out names that are in accnos file string binnames = list.get(i); vector<string> bnames; m->splitAtComma(binnames, bnames); string newNames = ""; for (int j = 0; j < bnames.size(); j++) { string name = bnames[j]; //if that name is in the .accnos file, add it if (names.count(name) != 0) { newNames += name + ","; selectedCount++; if (m->debug) { sanity["list"].insert(name); } } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.printHeaders(out); newList.print(out); } m->gobble(in); out.close(); } in.close(); if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); } m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readList"); exit(1); } }
//********************************************************************************************************************** int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){ try { //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); int numOtus = 0; //for each bin vector<string> binLabels = list->getLabels(); vector<string> newBinLabels; for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { return 0; } //parse out names that are in accnos file string binnames = list->get(i); bool keepBin = false; string groupFileOutput = ""; //parse names string individual = ""; int length = binnames.length(); for(int j=0;j<length;j++){ if(binnames[j] == ','){ string group = groupMap->getGroup(individual); if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; } if (m->inUsersGroups(group, Groups)) { keepBin = true; } groupFileOutput += individual + "\t" + group + "\n"; individual = ""; } else{ individual += binnames[j]; } } string group = groupMap->getGroup(individual); if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; } if (m->inUsersGroups(group, Groups)) { keepBin = true; } groupFileOutput += individual + "\t" + group + "\n"; //if there are sequences from the groups we want in this bin add to new list, output to groupfile if (keepBin) { newList.push_back(binnames); newBinLabels.push_back(binLabels[i]); outGroup << groupFileOutput; numOtus++; } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.printHeaders(out); newList.print(out); } m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "processList"); exit(1); } }
//********************************************************************************************************************** int RemoveGroupsCommand::readList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); ifstream in; m->openInputFile(listfile, in); bool wroteSomething = false; int removedCount = 0; while(!in.eof()){ removedCount = 0; //read in list vector ListVector list(in); variables["[tag]"] = list.getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; m->openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); vector<string> binLabels = list.getLabels(); vector<string> newBinLabels; //make a new list vector ListVector newList; newList.setLabel(list.getLabel()); //for each bin for (int i = 0; i < list.getNumBins(); i++) { if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list.get(i); string newNames = ""; while (binnames.find_first_of(',') != -1) { string name = binnames.substr(0,binnames.find_first_of(',')); binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); //if that name is in the .accnos file, add it if (names.count(name) == 0) { newNames += name + ","; } else { //if you are not in the accnos file check if you are a name that needs to be changed map<string, string>::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { newNames += it->second + ","; }else { removedCount++; } } } //get last name if (names.count(binnames) == 0) { newNames += binnames + ","; } else { //if you are not in the accnos file check if you are a name that needs to be changed map<string, string>::iterator it = uniqueToRedundant.find(binnames); if (it != uniqueToRedundant.end()) { newNames += it->second + ","; }else { removedCount++; } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.printHeaders(out); newList.print(out); } m->gobble(in); out.close(); } in.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); } m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readList"); exit(1); } }
//********************************************************************************************************************** int RemoveRareCommand::processList(){ try { //you must provide a label because the names in the listfile need to be consistent string thisLabel = ""; if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); } else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); } else { thisLabel = *labels.begin(); } InputData input(listfile, "list"); ListVector* list = input.getListVector(); //get first one or the one we want if (thisLabel != "") { //use smart distancing set<string> userLabels; userLabels.insert(thisLabel); set<string> processedLabels; string lastLabel = list->getLabel(); while((list != NULL) && (userLabels.size() != 0)) { if(userLabels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); delete list; list = input.getListVector(lastLabel); break; } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } if (userLabels.size() != 0) { m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine(); list = input.getListVector(lastLabel); } } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); variables["[tag]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); variables["[extension]"] = m->getExtension(groupfile); string outputGroupFileName = getOutputFileName("group", variables); variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); variables["[extension]"] = m->getExtension(countfile); string outputCountFileName = getOutputFileName("count", variables); ofstream out, outGroup; m->openOutputFile(outputFileName, out); bool wroteSomething = false; //if groupfile is given then use it GroupMap* groupMap; CountTable ct; if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); SharedUtil util; vector<string> namesGroups = groupMap->getNamesOfGroups(); util.setGroups(Groups, namesGroups); m->openOutputFile(outputGroupFileName, outGroup); }else if (countfile != "") { ct.readTable(countfile, true, false); if (ct.hasGroupInfo()) { vector<string> namesGroups = ct.getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, namesGroups); } } if (list != NULL) { vector<string> binLabels = list->getLabels(); vector<string> newLabels; //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close(); m->mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list->get(i); vector<string> names; string saveBinNames = binnames; m->splitAtComma(binnames, names); int binsize = names.size(); vector<string> newGroupFile; if (groupfile != "") { vector<string> newNames; saveBinNames = ""; for(int k = 0; k < names.size(); k++) { string group = groupMap->getGroup(names[k]); if (m->inUsersGroups(group, Groups)) { newGroupFile.push_back(names[k] + "\t" + group); newNames.push_back(names[k]); saveBinNames += names[k] + ","; } } names = newNames; binsize = names.size(); saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); }else if (countfile != "") { saveBinNames = ""; binsize = 0; for(int k = 0; k < names.size(); k++) { if (ct.hasGroupInfo()) { vector<string> thisSeqsGroups = ct.getGroups(names[k]); int thisSeqsCount = 0; for (int n = 0; n < thisSeqsGroups.size(); n++) { if (m->inUsersGroups(thisSeqsGroups[n], Groups)) { thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]); } } binsize += thisSeqsCount; //if you don't have any seqs from the groups the user wants, then remove you. if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); } else { saveBinNames += names[k] + ","; } }else { binsize += ct.getNumSeqs(names[k]); saveBinNames += names[k] + ","; } } saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); } if (binsize > nseqs) { //keep bin newList.push_back(saveBinNames); newLabels.push_back(binLabels[i]); if (groupfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } } else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { ct.remove(newGroupFile[k]); } } }else { if (countfile != "") { for(int k = 0; k < names.size(); k++) { ct.remove(names[k]); } } } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newLabels); newList.printHeaders(out); newList.print(out); } } out.close(); if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); } if (countfile != "") { if (ct.hasGroupInfo()) { vector<string> allGroups = ct.getNamesOfGroups(); for (int i = 0; i < allGroups.size(); i++) { if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); } } } ct.printTable(outputCountFileName); outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); } if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine(); } outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processList"); exit(1); } }