int SensSpecCommand::processColumn(){ try{ string origCutoff = ""; bool getCutoff = 0; if(cutoff == -1.00) { getCutoff = 1; } else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); } set<string> seqPairSet; int numSeqs = 0; InputData input(listFile, "list"); ListVector* list = input.getListVector(); string lastLabel = list->getLabel(); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete list; return 0; } if(allLines == 1 || labels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //process numSeqs = fillSeqPairSet(seqPairSet, list); process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; list = input.getListVector(lastLabel); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //process numSeqs = fillSeqPairSet(seqPairSet, list); process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); //restore real lastlabel to save below list->setLabel(saveLabel); } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { if (list != NULL) { delete list; } list = input.getListVector(lastLabel); //process numSeqs = fillSeqPairSet(seqPairSet, list); delete list; process(seqPairSet, list->getLabel(), getCutoff, origCutoff, numSeqs); } return 0; } catch(exception& e) { m->errorOut(e, "SensSpecCommand", "processColumn"); exit(1); } }
string SensSpecCommand::preProcessList(){ try { set<string> uniqueNames; //get unique names from distance file if (format == "phylip") { ifstream phylipFile; m->openInputFile(distFile, phylipFile); string numTest; int pNumSeqs; phylipFile >> numTest; m->gobble(phylipFile); if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } else { m->mothurConvert(numTest, pNumSeqs); } string seqName; for(int i=0;i<pNumSeqs;i++){ if (m->control_pressed) { return ""; } phylipFile >> seqName; m->getline(phylipFile); m->gobble(phylipFile); uniqueNames.insert(seqName); } phylipFile.close(); }else { ifstream columnFile; m->openInputFile(distFile, columnFile); string seqNameA, seqNameB; double distance; while(columnFile){ if (m->control_pressed) { return ""; } columnFile >> seqNameA >> seqNameB >> distance; uniqueNames.insert(seqNameA); uniqueNames.insert(seqNameB); m->gobble(columnFile); } columnFile.close(); } //read list file, if numSeqs > unique names then remove redundant names string newListFile = listFile + ".temp"; ofstream out; m->openOutputFile(newListFile, out); ifstream in; m->openInputFile(listFile, in); bool wroteSomething = false; while(!in.eof()){ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; } //read in list vector ListVector list(in); //listfile is already unique if (list.getNumSeqs() == uniqueNames.size()) { in.close(); out.close(); m->mothurRemove(newListFile); return ""; } //make a new list vector ListVector newList; newList.setLabel(list.getLabel()); //for each bin for (int i = 0; i < list.getNumBins(); i++) { //parse out names that are in accnos file string binnames = list.get(i); vector<string> bnames; m->splitAtComma(binnames, bnames); string newNames = ""; for (int j = 0; j < bnames.size(); j++) { string name = bnames[j]; //if that name is in the .accnos file, add it if (uniqueNames.count(name) != 0) { newNames += name + ","; } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.print(out); } m->gobble(in); } in.close(); out.close(); if (wroteSomething) { return newListFile; } else { m->mothurRemove(newListFile); } return ""; }
//********************************************************************************************************************** int GetRAbundCommand::processList(ofstream& out){ try { CountTable ct; ct.readTable(countfile, false, false); InputData input(inputfile, format); ListVector* list = input.getListVector(); string lastLabel = list->getLabel(); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; if (m->control_pressed) { delete list; return 0; } while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if(allLines == 1 || labels.count(list->getLabel()) == 1){ m->mothurOut(list->getLabel()); m->mothurOutEndLine(); if (m->control_pressed) { delete list; return 0; } RAbundVector* rabund = new RAbundVector(); createRabund(ct, list, rabund); if(sorted) { rabund->print(out); } else { rabund->nonSortedPrint(out); } delete rabund; processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; list = input.getListVector(lastLabel); m->mothurOut(list->getLabel()); m->mothurOutEndLine(); if (m->control_pressed) { delete list; return 0; } RAbundVector* rabund = new RAbundVector(); createRabund(ct, list, rabund); if(sorted) { rabund->print(out); } else { rabund->nonSortedPrint(out); } delete rabund; processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //restore real lastlabel to save below list->setLabel(saveLabel); } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { if (list != NULL) { delete list; } list = input.getListVector(lastLabel); m->mothurOut(list->getLabel()); m->mothurOutEndLine(); if (m->control_pressed) { delete list; return 0; } RAbundVector* rabund = new RAbundVector(); createRabund(ct, list, rabund); if(sorted) { rabund->print(out); } else { rabund->nonSortedPrint(out); } delete rabund; delete list; } return 0; } catch(exception& e) { m->errorOut(e, "GetRAbundCommand", "processList"); exit(1); } }
//********************************************************************************************************************** //returns a vector of listVectors where "little" vector is first vector<ListVector> OtuHierarchyCommand::getListVectors() { try { int pos; //to use in smart distancing, position of last read in file int lastPos; vector<ListVector> lists; //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; //open file ifstream in; m->openInputFile(listFile, in); //get first list vector in file ListVector* list = NULL; string lastLabel = ""; if (!in.eof()) { pos = in.tellg(); lastPos = pos; list = new ListVector(in); m->gobble(in); lastLabel = list->getLabel(); } while ((list != NULL) && (userLabels.size() != 0)) { if (m->control_pressed) { in.close(); delete list; return lists; } //is this a listvector that we want? if(labels.count(list->getLabel()) == 1){ //make copy of listvector ListVector temp(*list); lists.push_back(temp); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); } //you have a label the user want that is smaller than this label and the last label has not already been processed if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); int savePos = in.tellg(); //get smart distance line delete list; in.seekg(lastPos); if (!in.eof()) { list = new ListVector(in); }else { list = NULL; } //make copy of listvector ListVector temp(*list); lists.push_back(temp); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //restore real lastlabel to save below list->setLabel(saveLabel); in.seekg(savePos); } lastLabel = list->getLabel(); lastPos = pos; //get next line delete list; if (!in.eof()) { pos = in.tellg(); list = new ListVector(in); m->gobble(in); }else { list = NULL; } } if (m->control_pressed) { in.close(); return lists; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } if (m->control_pressed) { in.close(); return lists; } //run last label if you need to if (needToRun == true) { if (list != NULL) { delete list; } in.seekg(lastPos); if (!in.eof()) { list = new ListVector(in); //make copy of listvector ListVector temp(*list); lists.push_back(temp); delete list; } } in.close(); return lists; } catch(exception& e) { m->errorOut(e, "OtuHierarchyCommand", "getListVectors"); exit(1); } }
//********************************************************************************************************************** int RemoveRareCommand::processList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile); string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile); ofstream out, outGroup; m->openOutputFile(outputFileName, out); bool wroteSomething = false; //you must provide a label because the names in the listfile need to be consistent string thisLabel = ""; if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); } else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); } else { thisLabel = *labels.begin(); } InputData input(listfile, "list"); ListVector* list = input.getListVector(); //get first one or the one we want if (thisLabel != "") { //use smart distancing set<string> userLabels; userLabels.insert(thisLabel); set<string> processedLabels; string lastLabel = list->getLabel(); while((list != NULL) && (userLabels.size() != 0)) { if(userLabels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); delete list; list = input.getListVector(lastLabel); break; } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } if (userLabels.size() != 0) { m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine(); list = input.getListVector(lastLabel); } } //if groupfile is given then use it GroupMap* groupMap; if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); SharedUtil util; vector<string> namesGroups = groupMap->getNamesOfGroups(); util.setGroups(Groups, namesGroups); m->openOutputFile(outputGroupFileName, outGroup); } if (list != NULL) { //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close(); m->mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list->get(i); vector<string> names; string saveBinNames = binnames; m->splitAtComma(binnames, names); vector<string> newGroupFile; if (groupfile != "") { vector<string> newNames; saveBinNames = ""; for(int k = 0; k < names.size(); k++) { string group = groupMap->getGroup(names[k]); if (m->inUsersGroups(group, Groups)) { newGroupFile.push_back(names[k] + "\t" + group); newNames.push_back(names[k]); saveBinNames += names[k] + ","; } } names = newNames; saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); } if (names.size() > nseqs) { //keep bin newList.push_back(saveBinNames); for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.print(out); } } out.close(); if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); } if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine(); } outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processList"); exit(1); } }
//********************************************************************************************************************** int GetSeqsCommand::readList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); ifstream in; m->openInputFile(listfile, in); bool wroteSomething = false; int selectedCount = 0; if (m->debug) { set<string> temp; sanity["list"] = temp; } while(!in.eof()){ selectedCount = 0; //read in list vector ListVector list(in); //make a new list vector ListVector newList; newList.setLabel(list.getLabel()); variables["[distance]"] = list.getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; m->openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); vector<string> binLabels = list.getLabels(); vector<string> newBinLabels; if (m->control_pressed) { in.close(); out.close(); return 0; } //for each bin for (int i = 0; i < list.getNumBins(); i++) { //parse out names that are in accnos file string binnames = list.get(i); vector<string> bnames; m->splitAtComma(binnames, bnames); string newNames = ""; for (int j = 0; j < bnames.size(); j++) { string name = bnames[j]; //if that name is in the .accnos file, add it if (names.count(name) != 0) { newNames += name + ","; selectedCount++; if (m->debug) { sanity["list"].insert(name); } } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.printHeaders(out); newList.print(out); } m->gobble(in); out.close(); } in.close(); if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); } m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readList"); exit(1); } }
//********************************************************************************************************************** int RemoveGroupsCommand::readList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); ifstream in; m->openInputFile(listfile, in); bool wroteSomething = false; int removedCount = 0; while(!in.eof()){ removedCount = 0; //read in list vector ListVector list(in); variables["[tag]"] = list.getLabel(); string outputFileName = getOutputFileName("list", variables); ofstream out; m->openOutputFile(outputFileName, out); outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); vector<string> binLabels = list.getLabels(); vector<string> newBinLabels; //make a new list vector ListVector newList; newList.setLabel(list.getLabel()); //for each bin for (int i = 0; i < list.getNumBins(); i++) { if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list.get(i); string newNames = ""; while (binnames.find_first_of(',') != -1) { string name = binnames.substr(0,binnames.find_first_of(',')); binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); //if that name is in the .accnos file, add it if (names.count(name) == 0) { newNames += name + ","; } else { //if you are not in the accnos file check if you are a name that needs to be changed map<string, string>::iterator it = uniqueToRedundant.find(name); if (it != uniqueToRedundant.end()) { newNames += it->second + ","; }else { removedCount++; } } } //get last name if (names.count(binnames) == 0) { newNames += binnames + ","; } else { //if you are not in the accnos file check if you are a name that needs to be changed map<string, string>::iterator it = uniqueToRedundant.find(binnames); if (it != uniqueToRedundant.end()) { newNames += it->second + ","; }else { removedCount++; } } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); newBinLabels.push_back(binLabels[i]); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.printHeaders(out); newList.print(out); } m->gobble(in); out.close(); } in.close(); if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); } m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "readList"); exit(1); } }
//********************************************************************************************************************** int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){ try { //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); int numOtus = 0; //for each bin vector<string> binLabels = list->getLabels(); vector<string> newBinLabels; for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { return 0; } //parse out names that are in accnos file string binnames = list->get(i); bool keepBin = false; string groupFileOutput = ""; //parse names string individual = ""; int length = binnames.length(); for(int j=0;j<length;j++){ if(binnames[j] == ','){ string group = groupMap->getGroup(individual); if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; } if (m->inUsersGroups(group, Groups)) { keepBin = true; } groupFileOutput += individual + "\t" + group + "\n"; individual = ""; } else{ individual += binnames[j]; } } string group = groupMap->getGroup(individual); if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; } if (m->inUsersGroups(group, Groups)) { keepBin = true; } groupFileOutput += individual + "\t" + group + "\n"; //if there are sequences from the groups we want in this bin add to new list, output to groupfile if (keepBin) { newList.push_back(binnames); newBinLabels.push_back(binLabels[i]); outGroup << groupFileOutput; numOtus++; } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newBinLabels); newList.printHeaders(out); newList.print(out); } m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "processList"); exit(1); } }
//********************************************************************************************************************** int GetOtusCommand::readListGroup(){ try { InputData* input = new InputData(listfile, "list"); ListVector* list = input->getListVector(); string lastLabel = list->getLabel(); //using first label seen if none is provided if (label == "") { label = lastLabel; } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[tag]"] = label; variables["[extension]"] = m->getExtension(listfile); string outputFileName = getOutputFileName("list", variables); ofstream out; m->openOutputFile(outputFileName, out); string GroupOutputDir = outputDir; if (outputDir == "") { GroupOutputDir += m->hasPath(groupfile); } variables["[filename]"] = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)); variables["[extension]"] = m->getExtension(groupfile); string outputGroupFileName = getOutputFileName("group", variables); ofstream outGroup; m->openOutputFile(outputGroupFileName, outGroup); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> labels; labels.insert(label); set<string> processedLabels; set<string> userLabels = labels; bool wroteSomething = false; //as long as you are not at the end of the file or done wih the lines you want while((list != NULL) && (userLabels.size() != 0)) { if (m->control_pressed) { delete list; delete input; out.close(); outGroup.close(); m->mothurRemove(outputFileName); m->mothurRemove(outputGroupFileName);return 0; } if(labels.count(list->getLabel()) == 1){ processList(list, groupMap, out, outGroup, wroteSomething); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = list->getLabel(); delete list; list = input->getListVector(lastLabel); processList(list, groupMap, out, outGroup, wroteSomething); processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); //restore real lastlabel to save below list->setLabel(saveLabel); } lastLabel = list->getLabel(); delete list; list = NULL; //get next line to process list = input->getListVector(); } if (m->control_pressed) { if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); m->mothurRemove(outputFileName); m->mothurRemove(outputGroupFileName); return 0; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { if (list != NULL) { delete list; } list = input->getListVector(lastLabel); processList(list, groupMap, out, outGroup, wroteSomething); delete list; list = NULL; } out.close(); outGroup.close(); if (wroteSomething == false) { m->mothurOut("At distance " + label + " your file does NOT contain any otus containing sequences from the groups you wish to get."); m->mothurOutEndLine(); } outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); return 0; } catch(exception& e) { m->errorOut(e, "GetOtusCommand", "readList"); exit(1); } }
//********************************************************************************************************************** int MGClusterCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //read names file if (namefile != "") { nameMap = new NameAssignment(namefile); nameMap->readMap(); }else{ nameMap= new NameAssignment(); } string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile)); string tag = ""; time_t start; float previousDist = 0.00000; float rndPreviousDist = 0.00000; //read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector //must remember to delete those objects here since readBlast does not read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted); read->read(nameMap); list = new ListVector(nameMap->getListVector()); RAbundVector* rabund = new RAbundVector(list->getRAbundVector()); if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; } start = time(NULL); oldList = *list; map<string, int> Seq2Bin; map<string, int> oldSeq2Bin; if (method == "furthest") { tag = "fn"; } else if (method == "nearest") { tag = "nn"; } else { tag = "an"; } //open output files m->openOutputFile(fileroot+ tag + ".list", listFile); m->openOutputFile(fileroot+ tag + ".rabund", rabundFile); m->openOutputFile(fileroot+ tag + ".sabund", sabundFile); if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } double saveCutoff = cutoff; if (!hclusterWanted) { //get distmatrix and overlap SparseMatrix* distMatrix = read->getDistMatrix(); overlapMatrix = read->getOverlapMatrix(); //already sorted by read delete read; //create cluster if (method == "furthest") { cluster = new CompleteLinkage(rabund, list, distMatrix, cutoff, method); } else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); } else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); } cluster->setMapWanted(true); Seq2Bin = cluster->getSeqtoBin(); oldSeq2Bin = Seq2Bin; if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } //cluster using cluster classes while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){ cluster->update(cutoff); if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } float dist = distMatrix->getSmallDist(); float rndDist; if (hard) { rndDist = m->ceilDist(dist, precision); }else{ rndDist = m->roundDist(dist, precision); } if(previousDist <= 0.0000 && dist != previousDist){ oldList.setLabel("unique"); printData(&oldList); } else if(rndDist != rndPreviousDist){ if (merge) { ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist); if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } temp->setLabel(toString(rndPreviousDist, precisionLength-1)); printData(temp); delete temp; }else{ oldList.setLabel(toString(rndPreviousDist, precisionLength-1)); printData(&oldList); } } previousDist = dist; rndPreviousDist = rndDist; oldList = *list; Seq2Bin = cluster->getSeqtoBin(); oldSeq2Bin = Seq2Bin; } if(previousDist <= 0.0000){ oldList.setLabel("unique"); printData(&oldList); } else if(rndPreviousDist<cutoff){ if (merge) { ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist); if (m->control_pressed) { delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } temp->setLabel(toString(rndPreviousDist, precisionLength-1)); printData(temp); delete temp; }else{ oldList.setLabel(toString(rndPreviousDist, precisionLength-1)); printData(&oldList); } } //free memory overlapMatrix.clear(); delete distMatrix; delete cluster; }else { //use hcluster to cluster //get distmatrix and overlap overlapFile = read->getOverlapFile(); distFile = read->getDistFile(); delete read; //sort the distance and overlap files sortHclusterFiles(distFile, overlapFile); if (m->control_pressed) { delete nameMap; delete list; delete rabund; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } //create cluster hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff); hcluster->setMapWanted(true); Seq2Bin = cluster->getSeqtoBin(); oldSeq2Bin = Seq2Bin; vector<seqDist> seqs; seqs.resize(1); // to start loop //ifstream inHcluster; //m->openInputFile(distFile, inHcluster); if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } while (seqs.size() != 0){ seqs = hcluster->getSeqs(); //to account for cutoff change in average neighbor if (seqs.size() != 0) { if (seqs[0].dist > cutoff) { break; } } if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); m->mothurRemove(distFile); m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } for (int i = 0; i < seqs.size(); i++) { //-1 means skip me if (seqs[i].seq1 != seqs[i].seq2) { cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist); if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); m->mothurRemove(distFile); m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } float rndDist; if (hard) { rndDist = m->ceilDist(seqs[i].dist, precision); }else{ rndDist = m->roundDist(seqs[i].dist, precision); } if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){ oldList.setLabel("unique"); printData(&oldList); } else if((rndDist != rndPreviousDist)){ if (merge) { ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist); if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; delete temp; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); m->mothurRemove(distFile); m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } temp->setLabel(toString(rndPreviousDist, precisionLength-1)); printData(temp); delete temp; }else{ oldList.setLabel(toString(rndPreviousDist, precisionLength-1)); printData(&oldList); } } previousDist = seqs[i].dist; rndPreviousDist = rndDist; oldList = *list; Seq2Bin = cluster->getSeqtoBin(); oldSeq2Bin = Seq2Bin; } } } //inHcluster.close(); if(previousDist <= 0.0000){ oldList.setLabel("unique"); printData(&oldList); } else if(rndPreviousDist<cutoff){ if (merge) { ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist); if (m->control_pressed) { delete nameMap; delete list; delete rabund; delete hcluster; delete temp; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); m->mothurRemove(distFile); m->mothurRemove(overlapFile); outputTypes.clear(); return 0; } temp->setLabel(toString(rndPreviousDist, precisionLength-1)); printData(temp); delete temp; }else{ oldList.setLabel(toString(rndPreviousDist, precisionLength-1)); printData(&oldList); } } delete hcluster; m->mothurRemove(distFile); m->mothurRemove(overlapFile); } delete list; delete rabund; listFile.close(); sabundFile.close(); rabundFile.close(); if (m->control_pressed) { delete nameMap; listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); outputTypes.clear(); return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list"); m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund"); m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); if (saveCutoff != cutoff) { if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); } else { saveCutoff = m->roundDist(saveCutoff, precision); } m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); } //set list file as new current listfile string current = ""; itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } } //set rabund file as new current rabundfile itTypes = outputTypes.find("rabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); } } //set sabund file as new current sabundfile itTypes = outputTypes.find("sabund"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); } } m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "MGClusterCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int GetSeqsCommand::readList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile); ofstream out; m->openOutputFile(outputFileName, out); ifstream in; m->openInputFile(listfile, in); bool wroteSomething = false; int selectedCount = 0; while(!in.eof()){ selectedCount = 0; if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; } //read in list vector ListVector list(in); //make a new list vector ListVector newList; newList.setLabel(list.getLabel()); //for each bin for (int i = 0; i < list.getNumBins(); i++) { //parse out names that are in accnos file string binnames = list.get(i); string newNames = ""; while (binnames.find_first_of(',') != -1) { string name = binnames.substr(0,binnames.find_first_of(',')); binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length()); //if that name is in the .accnos file, add it if (names.count(name) != 0) { newNames += name + ","; selectedCount++; } } //get last name if (names.count(binnames) != 0) { newNames += binnames + ","; selectedCount++; } //if there are names in this bin add to new list if (newNames != "") { newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma newList.push_back(newNames); } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.print(out); } m->gobble(in); } in.close(); out.close(); if (wroteSomething == false) { m->mothurOut("Your file does not contain any sequence from the .accnos file."); m->mothurOutEndLine(); } outputNames.push_back(outputFileName); outputTypes["list"].push_back(outputFileName); m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetSeqsCommand", "readList"); exit(1); } }