//********************************************************************************************************************** int SharedCommand::createSharedFromListGroup() { try { GroupMap* groupMap = NULL; CountTable* countTable = NULL; pickedGroups = false; if (groupfile != "") { groupMap = new GroupMap(groupfile); int groupError = groupMap->readMap(); if (groupError == 1) { delete groupMap; return 0; } vector<string> allGroups = groupMap->getNamesOfGroups(); if (Groups.size() == 0) { Groups = allGroups; } else { pickedGroups = true; } }else{ countTable = new CountTable(); countTable->readTable(countfile, true, false); vector<string> allGroups = countTable->getNamesOfGroups(); if (Groups.size() == 0) { Groups = allGroups; } else { pickedGroups = true; } } int numGroups = Groups.size(); if (m->getControl_pressed()) { return 0; } ofstream out; string filename = ""; if (!pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); util.openOutputFile(filename, out); } //set fileroot fileroot = outputDir + util.getRootName(util.getSimpleName(listfile)); map<string, string> variables; variables["[filename]"] = fileroot; string errorOff = "no error"; InputData input(listfile, "shared", Groups); SharedListVector* SharedList = input.getSharedListVector(); string lastLabel = SharedList->getLabel(); SharedRAbundVectors* lookup; if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } out.close(); if (!pickedGroups) { util.mothurRemove(filename); } return 0; } //sanity check vector<string> namesSeqs; int numGroupNames = 0; if (current->getGroupMode() == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); } else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); } int error = ListGroupSameSeqs(namesSeqs, SharedList); if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct.\n"); m->setControl_pressed(true); out.close(); if (!pickedGroups) { util.mothurRemove(filename); } //remove blank shared file you made //delete memory delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } return 0; } if (error == 1) { m->setControl_pressed(true); } //if user has specified groups make new groupfile for them if ((pickedGroups) && (current->getGroupMode() == "group")) { //make new group file string groups = ""; if (numGroups < 4) { for (int i = 0; i < numGroups-1; i++) { groups += Groups[i] + "."; } groups+=Groups[numGroups-1]; }else { groups = "merge"; } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(listfile)); variables["[group]"] = groups; string newGroupFile = getOutputFileName("group",variables); outputTypes["group"].push_back(newGroupFile); outputNames.push_back(newGroupFile); ofstream outGroups; util.openOutputFile(newGroupFile, outGroups); vector<string> names = groupMap->getNamesSeqs(); string groupName; for (int i = 0; i < names.size(); i++) { groupName = groupMap->getGroup(names[i]); if (isValidGroup(groupName, Groups)) { outGroups << names[i] << '\t' << groupName << endl; } } outGroups.close(); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; bool printHeaders = true; while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){ lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } delete lookup; if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); } if ((util.anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = SharedList->getLabel(); delete SharedList; SharedList = input.getSharedListVector(lastLabel); //get new list vector to process lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } delete lookup; if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); //restore real lastlabel to save below SharedList->setLabel(saveLabel); } lastLabel = SharedList->getLabel(); delete SharedList; SharedList = input.getSharedListVector(); //get new list vector to process } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { if (processedLabels.count(lastLabel) != 1) { needToRun = true; } } //run last label if you need to if (needToRun ) { if (SharedList != NULL) { delete SharedList; } SharedList = input.getSharedListVector(lastLabel); //get new list vector to process lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; delete SharedList; } if (!pickedGroups) { out.close(); } if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (m->getControl_pressed()) { if (!pickedGroups) { util.mothurRemove(filename); } return 0; } return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "createSharedFromListGroup"); exit(1); } }
int SeqSummaryCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } int start = time(NULL); //set current fasta to fastafile m->setFastaFile(fastafile); map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile)); string summaryFile = getOutputFileName("summary",variables); long long numSeqs = 0; long long size = 0; long long numUniques = 0; map<int, long long> startPosition; map<int, long long> endPosition; map<int, long long> seqLength; map<int, long long> ambigBases; map<int, long long> longHomoPolymer; if (namefile != "") { nameMap = m->readNames(namefile); numUniques = nameMap.size(); } else if (countfile != "") { CountTable ct; ct.readTable(countfile, false, false); nameMap = ct.getNameMap(); size = ct.getNumSeqs(); numUniques = ct.getNumUniqueSeqs(); } if (m->control_pressed) { return 0; } vector<unsigned long long> positions; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } #else positions = m->setFilePosFasta(fastafile, numSeqs); if (numSeqs < processors) { processors = numSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } #endif if(processors == 1){ numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]); }else{ numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); } if (m->control_pressed) { return 0; } //set size if (countfile != "") {}//already set else if (namefile == "") { size = numSeqs; } else { for (map<int, long long>::iterator it = startPosition.begin(); it != startPosition.end(); it++) { size += it->second; } } if ((namefile != "") || (countfile != "")) { string type = "count"; if (namefile != "") { type = "name"; } if (numSeqs != numUniques) { // do fasta and name/count files match m->mothurOut("[ERROR]: Your " + type + " file contains " + toString(numUniques) + " unique sequences, but your fasta file contains " + toString(numSeqs) + ". File mismatch detected, quitting command.\n"); m->control_pressed = true; } } if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; } long long ptile0_25 = 1+(long long)(size * 0.025); //number of sequences at 2.5% long long ptile25 = 1+(long long)(size * 0.250); //number of sequences at 25% long long ptile50 = 1+(long long)(size * 0.500); long long ptile75 = 1+(long long)(size * 0.750); long long ptile97_5 = 1+(long long)(size * 0.975); long long ptile100 = (long long)(size); vector<int> starts; starts.resize(7,0); vector<int> ends; ends.resize(7,0); vector<int> ambigs; ambigs.resize(7,0); vector<int> lengths; lengths.resize(7,0); vector<int> homops; homops.resize(7,0); //find means long long meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer; meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0; //minimum if ((startPosition.begin())->first == -1) { starts[0] = 0; } else {starts[0] = (startPosition.begin())->first; } long long totalSoFar = 0; //set all values to min starts[1] = starts[0]; starts[2] = starts[0]; starts[3] = starts[0]; starts[4] = starts[0]; starts[5] = starts[0]; int lastValue = 0; for (map<int, long long>::iterator it = startPosition.begin(); it != startPosition.end(); it++) { int value = it->first; if (value == -1) { value = 0; } meanStartPosition += (value*it->second); totalSoFar += it->second; if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){ starts[1] = value; } //save value if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) || ((lastValue < ptile25) && (totalSoFar > ptile25))) { starts[2] = value; } //save value if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) || ((lastValue < ptile50) && (totalSoFar > ptile50))) { starts[3] = value; } //save value if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) || ((lastValue < ptile75) && (totalSoFar > ptile75))) { starts[4] = value; } //save value if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) || ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) { starts[5] = value; } //save value if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) { starts[6] = value; } //save value lastValue = totalSoFar; } starts[6] = (startPosition.rbegin())->first; if ((endPosition.begin())->first == -1) { ends[0] = 0; } else {ends[0] = (endPosition.begin())->first; } totalSoFar = 0; //set all values to min ends[1] = ends[0]; ends[2] = ends[0]; ends[3] = ends[0]; ends[4] = ends[0]; ends[5] = ends[0]; lastValue = 0; for (map<int, long long>::iterator it = endPosition.begin(); it != endPosition.end(); it++) { int value = it->first; if (value == -1) { value = 0; } meanEndPosition += (value*it->second); totalSoFar += it->second; if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){ ends[1] = value; } //save value if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) || ((lastValue < ptile25) && (totalSoFar > ptile25))) { ends[2] = value; } //save value if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) || ((lastValue < ptile50) && (totalSoFar > ptile50))) { ends[3] = value; } //save value if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) || ((lastValue < ptile75) && (totalSoFar > ptile75))) { ends[4] = value; } //save value if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) || ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) { ends[5] = value; } //save value if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) { ends[6] = value; } //save value lastValue = totalSoFar; } ends[6] = (endPosition.rbegin())->first; if ((seqLength.begin())->first == -1) { lengths[0] = 0; } else {lengths[0] = (seqLength.begin())->first; } //set all values to min lengths[1] = lengths[0]; lengths[2] = lengths[0]; lengths[3] = lengths[0]; lengths[4] = lengths[0]; lengths[5] = lengths[0]; totalSoFar = 0; lastValue = 0; for (map<int, long long>::iterator it = seqLength.begin(); it != seqLength.end(); it++) { int value = it->first; meanSeqLength += (value*it->second); totalSoFar += it->second; if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){ lengths[1] = value; } //save value if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) || ((lastValue < ptile25) && (totalSoFar > ptile25))) { lengths[2] = value; } //save value if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) || ((lastValue < ptile50) && (totalSoFar > ptile50))) { lengths[3] = value; } //save value if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) || ((lastValue < ptile75) && (totalSoFar > ptile75))) { lengths[4] = value; } //save value if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) || ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) { lengths[5] = value; } //save value if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) { lengths[6] = value; } //save value lastValue = totalSoFar; } lengths[6] = (seqLength.rbegin())->first; if ((ambigBases.begin())->first == -1) { ambigs[0] = 0; } else {ambigs[0] = (ambigBases.begin())->first; } //set all values to min ambigs[1] = ambigs[0]; ambigs[2] = ambigs[0]; ambigs[3] = ambigs[0]; ambigs[4] = ambigs[0]; ambigs[5] = ambigs[0]; totalSoFar = 0; lastValue = 0; for (map<int, long long>::iterator it = ambigBases.begin(); it != ambigBases.end(); it++) { int value = it->first; meanAmbigBases += (value*it->second); totalSoFar += it->second; if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){ ambigs[1] = value; } //save value if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) || ((lastValue < ptile25) && (totalSoFar > ptile25))) { ambigs[2] = value; } //save value if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) || ((lastValue < ptile50) && (totalSoFar > ptile50))) { ambigs[3] = value; } //save value if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) || ((lastValue < ptile75) && (totalSoFar > ptile75))) { ambigs[4] = value; } //save value if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) || ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) { ambigs[5] = value; } //save value if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) { ambigs[6] = value; } //save value lastValue = totalSoFar; } ambigs[6] = (ambigBases.rbegin())->first; if ((longHomoPolymer.begin())->first == -1) { homops[0] = 0; } else {homops[0] = (longHomoPolymer.begin())->first; } //set all values to min homops[1] = homops[0]; homops[2] = homops[0]; homops[3] = homops[0]; homops[4] = homops[0]; homops[5] = homops[0]; totalSoFar = 0; lastValue = 0; for (map<int, long long>::iterator it = longHomoPolymer.begin(); it != longHomoPolymer.end(); it++) { int value = it->first; meanLongHomoPolymer += (it->first*it->second); totalSoFar += it->second; if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){ homops[1] = value; } //save value if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) || ((lastValue < ptile25) && (totalSoFar > ptile25))) { homops[2] = value; } //save value if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) || ((lastValue < ptile50) && (totalSoFar > ptile50))) { homops[3] = value; } //save value if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) || ((lastValue < ptile75) && (totalSoFar > ptile75))) { homops[4] = value; } //save value if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) || ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) { homops[5] = value; } //save value if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) { homops[6] = value; } //save value lastValue = totalSoFar; } homops[6] = (longHomoPolymer.rbegin())->first; double meanstartPosition, meanendPosition, meanseqLength, meanambigBases, meanlongHomoPolymer; meanstartPosition = meanStartPosition / (double) size; meanendPosition = meanEndPosition /(double) size; meanlongHomoPolymer = meanLongHomoPolymer / (double) size; meanseqLength = meanSeqLength / (double) size; meanambigBases = meanAmbigBases /(double) size; if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; } m->mothurOutEndLine(); m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer\tNumSeqs"); m->mothurOutEndLine(); m->mothurOut("Minimum:\t" + toString(starts[0]) + "\t" + toString(ends[0]) + "\t" + toString(lengths[0]) + "\t" + toString(ambigs[0]) + "\t" + toString(homops[0]) + "\t" + toString(1)); m->mothurOutEndLine(); m->mothurOut("2.5%-tile:\t" + toString(starts[1]) + "\t" + toString(ends[1]) + "\t" + toString(lengths[1]) + "\t" + toString(ambigs[1]) + "\t" + toString(homops[1]) + "\t" + toString(ptile0_25)); m->mothurOutEndLine(); m->mothurOut("25%-tile:\t" + toString(starts[2]) + "\t" + toString(ends[2]) + "\t" + toString(lengths[2]) + "\t" + toString(ambigs[2]) + "\t" + toString(homops[2]) + "\t" + toString(ptile25)); m->mothurOutEndLine(); m->mothurOut("Median: \t" + toString(starts[3]) + "\t" + toString(ends[3]) + "\t" + toString(lengths[3]) + "\t" + toString(ambigs[3]) + "\t" + toString(homops[3]) + "\t" + toString(ptile50)); m->mothurOutEndLine(); m->mothurOut("75%-tile:\t" + toString(starts[4]) + "\t" + toString(ends[4]) + "\t" + toString(lengths[4]) + "\t" + toString(ambigs[4]) + "\t" + toString(homops[4]) + "\t" + toString(ptile75)); m->mothurOutEndLine(); m->mothurOut("97.5%-tile:\t" + toString(starts[5]) + "\t" + toString(ends[5]) + "\t" + toString(lengths[5]) + "\t" + toString(ambigs[5]) + "\t" + toString(homops[5]) + "\t" + toString(ptile97_5)); m->mothurOutEndLine(); m->mothurOut("Maximum:\t" + toString(starts[6]) + "\t" + toString(ends[6]) + "\t" + toString(lengths[6]) + "\t" + toString(ambigs[6]) + "\t" + toString(homops[6]) + "\t" + toString(ptile100)); m->mothurOutEndLine(); m->mothurOut("Mean:\t" + toString(meanstartPosition) + "\t" + toString(meanendPosition) + "\t" + toString(meanseqLength) + "\t" + toString(meanambigBases) + "\t" + toString(meanlongHomoPolymer)); m->mothurOutEndLine(); if ((namefile == "") && (countfile == "")) { m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); } else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); } if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(summaryFile); m->mothurOutEndLine(); outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile); m->mothurOutEndLine(); if ((namefile == "") && (countfile == "")) { m->mothurOut("It took " + toString(time(NULL) - start) + " secs to summarize " + toString(numSeqs) + " sequences.\n"); } else{ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to summarize " + toString(size) + " sequences.\n"); } //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("summary"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSummaryFile(current); } } return 0; } catch(exception& e) { m->errorOut(e, "SeqSummaryCommand", "execute"); exit(1); } }