//********************************************************************************************************************** int RemoveRareCommand::processList(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile); string outputGroupFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile); ofstream out, outGroup; m->openOutputFile(outputFileName, out); bool wroteSomething = false; //you must provide a label because the names in the listfile need to be consistent string thisLabel = ""; if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); } else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); } else { thisLabel = *labels.begin(); } InputData input(listfile, "list"); ListVector* list = input.getListVector(); //get first one or the one we want if (thisLabel != "") { //use smart distancing set<string> userLabels; userLabels.insert(thisLabel); set<string> processedLabels; string lastLabel = list->getLabel(); while((list != NULL) && (userLabels.size() != 0)) { if(userLabels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); delete list; list = input.getListVector(lastLabel); break; } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } if (userLabels.size() != 0) { m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine(); list = input.getListVector(lastLabel); } } //if groupfile is given then use it GroupMap* groupMap; if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); SharedUtil util; vector<string> namesGroups = groupMap->getNamesOfGroups(); util.setGroups(Groups, namesGroups); m->openOutputFile(outputGroupFileName, outGroup); } if (list != NULL) { //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close(); m->mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list->get(i); vector<string> names; string saveBinNames = binnames; m->splitAtComma(binnames, names); vector<string> newGroupFile; if (groupfile != "") { vector<string> newNames; saveBinNames = ""; for(int k = 0; k < names.size(); k++) { string group = groupMap->getGroup(names[k]); if (m->inUsersGroups(group, Groups)) { newGroupFile.push_back(names[k] + "\t" + group); newNames.push_back(names[k]); saveBinNames += names[k] + ","; } } names = newNames; saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); } if (names.size() > nseqs) { //keep bin newList.push_back(saveBinNames); for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.print(out); } } out.close(); if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); } if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine(); } outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processList"); exit(1); } }
int CountTable::createTable(string namefile, string groupfile, bool createGroup) { try { if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; } GroupMap* groupMap; int numGroups = 0; groups.clear(); totalGroups.clear(); indexGroupMap.clear(); indexNameMap.clear(); counts.clear(); map<int, string> originalGroupIndexes; if (groupfile != "") { hasGroups = true; groupMap = new GroupMap(groupfile); groupMap->readMap(); numGroups = groupMap->getNumGroups(); groups = groupMap->getNamesOfGroups(); totalGroups.resize(numGroups, 0); }else if(createGroup) { hasGroups = true; numGroups = 1; groups.push_back("Group1"); totalGroups.resize(numGroups, 0); } //sort groups to keep consistent with how we store the groups in groupmap sort(groups.begin(), groups.end()); for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; } m->setAllGroups(groups); bool error = false; string name; uniques = 0; total = 0; //open input file ifstream in; m->openInputFile(namefile, in); int total = 0; while (!in.eof()) { if (m->control_pressed) { break; } string firstCol, secondCol; in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in); m->checkName(firstCol); m->checkName(secondCol); vector<string> names; m->splitAtChar(secondCol, names, ','); map<string, int> groupCounts; int thisTotal = 0; if (groupfile != "") { //set to 0 for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; } //get counts for each of the users groups for (int i = 0; i < names.size(); i++) { string group = groupMap->getGroup(names[i]); if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; } else { map<string, int>::iterator it = groupCounts.find(group); //if not found, then this sequence is not from a group we care about if (it != groupCounts.end()) { it->second++; thisTotal++; } } } }else if (createGroup) { groupCounts["Group1"]=0; for (int i = 0; i < names.size(); i++) { string group = "Group1"; groupCounts["Group1"]++; thisTotal++; } }else { thisTotal = names.size(); } //if group info, then read it vector<int> thisGroupsCount; thisGroupsCount.resize(numGroups, 0); for (int i = 0; i < numGroups; i++) { thisGroupsCount[i] = groupCounts[groups[i]]; totalGroups[i] += thisGroupsCount[i]; } map<string, int>::iterator it = indexNameMap.find(firstCol); if (it == indexNameMap.end()) { if (hasGroups) { counts.push_back(thisGroupsCount); } indexNameMap[firstCol] = uniques; totals.push_back(thisTotal); total += thisTotal; uniques++; }else { error = true; m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } } in.close(); if (error) { m->control_pressed = true; } else { //check for zero groups if (hasGroups) { for (int i = 0; i < totalGroups.size(); i++) { if (totalGroups[i] == 0) { m->mothurOut("\nRemoving group: " + groups[i] + " because all sequences have been removed.\n"); removeGroup(groups[i]); i--; } } } } if (groupfile != "") { delete groupMap; } return 0; } catch(exception& e) { m->errorOut(e, "CountTable", "createTable"); exit(1); } }
int ClassifySeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } string outputMethodTag = method; if(method == "wang"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion()); } else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, util.getRandomNumber(), current->getVersion()); } else if(method == "zap"){ outputMethodTag = search + "_" + outputMethodTag; if (search == "kmer") { classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); } else { classify = new AlignTree(templateFileName, taxonomyFileName, cutoff); } } else { m->mothurOut(search + " is not a valid method option. I will run the command using wang."); m->mothurOutEndLine(); classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion()); } if (m->getControl_pressed()) { delete classify; return 0; } m->mothurOut("Classifying sequences from " + fastafile + " ...\n" ); string baseTName = util.getSimpleName(taxonomyFileName); //set rippedTaxName to string RippedTaxName = ""; bool foundDot = false; for (int i = baseTName.length()-1; i >= 0; i--) { if (foundDot && (baseTName[i] != '.')) { RippedTaxName = baseTName[i] + RippedTaxName; } else if (foundDot && (baseTName[i] == '.')) { break; } else if (!foundDot && (baseTName[i] == '.')) { foundDot = true; } } if (outputDir == "") { outputDir += util.hasPath(fastafile); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = RippedTaxName; variables["[tag2]"] = outputMethodTag; string newTaxonomyFile = getOutputFileName("taxonomy", variables); string newaccnosFile = getOutputFileName("accnos", variables); string tempTaxonomyFile = outputDir + util.getRootName(util.getSimpleName(fastafile)) + "taxonomy.temp"; string taxSummary = getOutputFileName("taxsummary", variables); if ((method == "knn") && (search == "distance")) { string DistName = getOutputFileName("matchdist", variables); classify->setDistName(DistName); outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName); } outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile); outputNames.push_back(taxSummary); outputTypes["taxsummary"].push_back(taxSummary); long start = time(NULL); int numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastafile); if (!util.isBlank(newaccnosFile)) { m->mothurOut("\n[WARNING]: mothur reversed some your sequences for a better classification. If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences.\n"); outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile); }else { util.mothurRemove(newaccnosFile); } m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences.\n\n"); start = time(NULL); //read namefile map<string, vector<string> > nameMap; map<string, vector<string> >::iterator itNames; if(namefile != "") { m->mothurOut("Reading " + namefile + "..."); cout.flush(); nameMap.clear(); //remove old names util.readNames(namefile, nameMap); m->mothurOut(" Done.\n"); } //output taxonomy with the unclassified bins added ifstream inTax; util.openInputFile(newTaxonomyFile, inTax); ofstream outTax; string unclass = newTaxonomyFile + ".unclass.temp"; util.openOutputFile(unclass, outTax); //get maxLevel from phylotree so you know how many 'unclassified's to add int maxLevel = classify->getMaxLevel(); //read taxfile - this reading and rewriting is done to preserve the confidence scores. string name, taxon; GroupMap* groupMap = NULL; CountTable* ct = NULL; PhyloSummary* taxaSum; if (hasCount) { ct = new CountTable(); ct->readTable(countfile, true, false); taxaSum = new PhyloSummary(ct, relabund, printlevel); }else { if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); } taxaSum = new PhyloSummary(groupMap, relabund, printlevel); } while (!inTax.eof()) { if (m->getControl_pressed()) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete classify; return 0; } inTax >> name; util.gobble(inTax); taxon = util.getline(inTax); util.gobble(inTax); string newTax = util.addUnclassifieds(taxon, maxLevel, probs); outTax << name << '\t' << newTax << endl; if (namefile != "") { itNames = nameMap.find(name); if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file please correct.\n"); exit(1); }else{ //add it as many times as there are identical seqs for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); } itNames->second.clear(); nameMap.erase(itNames->first); } }else { taxaSum->addSeqToTree(name, newTax); } } inTax.close(); outTax.close(); util.mothurRemove(newTaxonomyFile); util.renameFile(unclass, newTaxonomyFile); if (m->getControl_pressed()) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete classify; return 0; } //print summary file ofstream outTaxTree; util.openOutputFile(taxSummary, outTaxTree); taxaSum->print(outTaxTree, output); outTaxTree.close(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; util.mothurRemove(tempTaxonomyFile); delete classify; m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences.\n\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set taxonomy file as new current taxonomyfile string currentName = ""; itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int SharedCommand::createSharedFromListGroup() { try { GroupMap* groupMap = NULL; CountTable* countTable = NULL; pickedGroups = false; if (groupfile != "") { groupMap = new GroupMap(groupfile); int groupError = groupMap->readMap(); if (groupError == 1) { delete groupMap; return 0; } vector<string> allGroups = groupMap->getNamesOfGroups(); if (Groups.size() == 0) { Groups = allGroups; } else { pickedGroups = true; } }else{ countTable = new CountTable(); countTable->readTable(countfile, true, false); vector<string> allGroups = countTable->getNamesOfGroups(); if (Groups.size() == 0) { Groups = allGroups; } else { pickedGroups = true; } } int numGroups = Groups.size(); if (m->getControl_pressed()) { return 0; } ofstream out; string filename = ""; if (!pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); util.openOutputFile(filename, out); } //set fileroot fileroot = outputDir + util.getRootName(util.getSimpleName(listfile)); map<string, string> variables; variables["[filename]"] = fileroot; string errorOff = "no error"; InputData input(listfile, "shared", Groups); SharedListVector* SharedList = input.getSharedListVector(); string lastLabel = SharedList->getLabel(); SharedRAbundVectors* lookup; if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } out.close(); if (!pickedGroups) { util.mothurRemove(filename); } return 0; } //sanity check vector<string> namesSeqs; int numGroupNames = 0; if (current->getGroupMode() == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); } else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); } int error = ListGroupSameSeqs(namesSeqs, SharedList); if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct.\n"); m->setControl_pressed(true); out.close(); if (!pickedGroups) { util.mothurRemove(filename); } //remove blank shared file you made //delete memory delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } return 0; } if (error == 1) { m->setControl_pressed(true); } //if user has specified groups make new groupfile for them if ((pickedGroups) && (current->getGroupMode() == "group")) { //make new group file string groups = ""; if (numGroups < 4) { for (int i = 0; i < numGroups-1; i++) { groups += Groups[i] + "."; } groups+=Groups[numGroups-1]; }else { groups = "merge"; } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(listfile)); variables["[group]"] = groups; string newGroupFile = getOutputFileName("group",variables); outputTypes["group"].push_back(newGroupFile); outputNames.push_back(newGroupFile); ofstream outGroups; util.openOutputFile(newGroupFile, outGroups); vector<string> names = groupMap->getNamesSeqs(); string groupName; for (int i = 0; i < names.size(); i++) { groupName = groupMap->getGroup(names[i]); if (isValidGroup(groupName, Groups)) { outGroups << names[i] << '\t' << groupName << endl; } } outGroups.close(); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; bool printHeaders = true; while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){ lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } delete lookup; if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); } if ((util.anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = SharedList->getLabel(); delete SharedList; SharedList = input.getSharedListVector(lastLabel); //get new list vector to process lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } delete lookup; if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); //restore real lastlabel to save below SharedList->setLabel(saveLabel); } lastLabel = SharedList->getLabel(); delete SharedList; SharedList = input.getSharedListVector(); //get new list vector to process } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { if (processedLabels.count(lastLabel) != 1) { needToRun = true; } } //run last label if you need to if (needToRun ) { if (SharedList != NULL) { delete SharedList; } SharedList = input.getSharedListVector(lastLabel); //get new list vector to process lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; delete SharedList; } if (!pickedGroups) { out.close(); } if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (m->getControl_pressed()) { if (!pickedGroups) { util.mothurRemove(filename); } return 0; } return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "createSharedFromListGroup"); exit(1); } }
int ClassifySeqsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } string outputMethodTag = method; if(method == "wang"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts); } else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand()); } else if(method == "zap"){ outputMethodTag = search + "_" + outputMethodTag; if (search == "kmer") { classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); } else { classify = new AlignTree(templateFileName, taxonomyFileName, cutoff); } } else { m->mothurOut(search + " is not a valid method option. I will run the command using wang."); m->mothurOutEndLine(); classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts); } if (m->control_pressed) { delete classify; return 0; } for (int s = 0; s < fastaFileNames.size(); s++) { m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); string baseTName = m->getSimpleName(taxonomyFileName); //set rippedTaxName to string RippedTaxName = ""; bool foundDot = false; for (int i = baseTName.length()-1; i >= 0; i--) { if (foundDot && (baseTName[i] != '.')) { RippedTaxName = baseTName[i] + RippedTaxName; } else if (foundDot && (baseTName[i] == '.')) { break; } else if (!foundDot && (baseTName[i] == '.')) { foundDot = true; } } //if (RippedTaxName != "") { RippedTaxName += "."; } if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); } map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])); variables["[tag]"] = RippedTaxName; variables["[tag2]"] = outputMethodTag; string newTaxonomyFile = getOutputFileName("taxonomy", variables); string newaccnosFile = getOutputFileName("accnos", variables); string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp"; string taxSummary = getOutputFileName("taxsummary", variables); if ((method == "knn") && (search == "distance")) { string DistName = getOutputFileName("matchdist", variables); classify->setDistName(DistName); outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName); } outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile); outputNames.push_back(taxSummary); outputTypes["taxsummary"].push_back(taxSummary); int start = time(NULL); int numFastaSeqs = 0; for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); vector<unsigned long long> positions; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastaFileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(new linePair(0, 1000)); }else { positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif if(processors == 1){ numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); }else{ numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); } if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur reversed some your sequences for a better classification. If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile); }else { m->mothurRemove(newaccnosFile); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); start = time(NULL); //read namefile if(namefile != "") { m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush(); nameMap.clear(); //remove old names m->readNames(namefileNames[s], nameMap); m->mothurOut(" Done."); m->mothurOutEndLine(); } //output taxonomy with the unclassified bins added ifstream inTax; m->openInputFile(newTaxonomyFile, inTax); ofstream outTax; string unclass = newTaxonomyFile + ".unclass.temp"; m->openOutputFile(unclass, outTax); //get maxLevel from phylotree so you know how many 'unclassified's to add int maxLevel = classify->getMaxLevel(); //read taxfile - this reading and rewriting is done to preserve the confidence scores. string name, taxon; string group = ""; GroupMap* groupMap = NULL; CountTable* ct = NULL; PhyloSummary* taxaSum; if (hasCount) { ct = new CountTable(); ct->readTable(countfileNames[s], true, false); taxaSum = new PhyloSummary(ct, relabund, printlevel); }else { if (groupfile != "") { group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); } taxaSum = new PhyloSummary(groupMap, relabund, printlevel); } while (!inTax.eof()) { if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; } inTax >> name >> taxon; m->gobble(inTax); string newTax = m->addUnclassifieds(taxon, maxLevel, probs); outTax << name << '\t' << newTax << endl; if (namefile != "") { itNames = nameMap.find(name); if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1); }else{ for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); //add it as many times as there are identical seqs } itNames->second.clear(); nameMap.erase(itNames->first); } }else { taxaSum->addSeqToTree(name, newTax); } } inTax.close(); outTax.close(); m->mothurRemove(newTaxonomyFile); rename(unclass.c_str(), newTaxonomyFile.c_str()); if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; } //print summary file ofstream outTaxTree; m->openOutputFile(taxSummary, outTaxTree); taxaSum->print(outTaxTree, output); outTaxTree.close(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; m->mothurRemove(tempTaxonomyFile); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); } delete classify; m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set taxonomy file as new current taxonomyfile string current = ""; itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } current = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } } return 0; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "execute"); exit(1); } }