SequenceCountParser::SequenceCountParser(string countfile, string fastafile) { try { m = MothurOut::getInstance(); //read count file CountTable countTable; countTable.readTable(countfile, true, false); //initialize maps namesOfGroups = countTable.getNamesOfGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { vector<Sequence> temp; map<string, int> tempMap; seqs[namesOfGroups[i]] = temp; countTablePerGroup[namesOfGroups[i]] = tempMap; } //read fasta file making sure each sequence is in the group file ifstream in; m->openInputFile(fastafile, in); int fastaCount = 0; while (!in.eof()) { if (m->control_pressed) { break; } Sequence seq(in); m->gobble(in); fastaCount++; if (m->debug) { if((fastaCount) % 1000 == 0){ m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n."); } } if (seq.getName() != "") { allSeqsMap[seq.getName()] = seq.getName(); vector<int> groupCounts = countTable.getGroupCounts(seq.getName()); for (int i = 0; i < namesOfGroups.size(); i++) { if (groupCounts[i] != 0) { seqs[namesOfGroups[i]].push_back(seq); countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i]; } } } } in.close(); } catch(exception& e) { m->errorOut(e, "SequenceCountParser", "SequenceCountParser"); exit(1); } }
SequenceCountParser::SequenceCountParser(string fastafile, CountTable& countTable) { try { m = MothurOut::getInstance(); //initialize maps if (countTable.hasGroupInfo()) { namesOfGroups = countTable.getNamesOfGroups(); for (int i = 0; i < namesOfGroups.size(); i++) { vector<Sequence> temp; map<string, int> tempMap; seqs[namesOfGroups[i]] = temp; countTablePerGroup[namesOfGroups[i]] = tempMap; } //read fasta file making sure each sequence is in the group file ifstream in; m->openInputFile(fastafile, in); int fastaCount = 0; while (!in.eof()) { if (m->control_pressed) { break; } Sequence seq(in); m->gobble(in); fastaCount++; if (m->debug) { if((fastaCount) % 1000 == 0){ m->mothurOut("[DEBUG]: reading seq " + toString(fastaCount) + "\n."); } } if (seq.getName() != "") { allSeqsMap[seq.getName()] = seq.getName(); vector<int> groupCounts = countTable.getGroupCounts(seq.getName()); for (int i = 0; i < namesOfGroups.size(); i++) { if (groupCounts[i] != 0) { seqs[namesOfGroups[i]].push_back(seq); countTablePerGroup[namesOfGroups[i]][seq.getName()] = groupCounts[i]; } } } } in.close(); }else { m->control_pressed = true; m->mothurOut("[ERROR]: cannot parse fasta file by group with a count table that does not include group data, please correct.\n"); } } catch(exception& e) { m->errorOut(e, "SequenceCountParser", "SequenceCountParser"); exit(1); } }
//********************************************************************************************************************** int SplitGroupCommand::runCount(){ try { CountTable ct; ct.readTable(countfile, true, false); if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, cannot split by group.\n"); m->control_pressed = true; } if (m->control_pressed) { return 0; } vector<string> namesGroups = ct.getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, namesGroups); //fill filehandles with neccessary ofstreams map<string, string> ffiles; //group -> filename map<string, string> cfiles; //group -> filename for (int i=0; i<Groups.size(); i++) { ofstream ftemp, ctemp; map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile)); variables["[group]"] = Groups[i]; string newFasta = getOutputFileName("fasta",variables); outputNames.push_back(newFasta); outputTypes["fasta"].push_back(newFasta); ffiles[Groups[i]] = newFasta; m->openOutputFile(newFasta, ftemp); ftemp.close(); variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile)); string newCount = getOutputFileName("count",variables); outputNames.push_back(newCount); outputTypes["count"].push_back(newCount); cfiles[Groups[i]] = newCount; m->openOutputFile(newCount, ctemp); ctemp << "Representative_Sequence\ttotal\t" << Groups[i] << endl; ctemp.close(); } ifstream in; m->openInputFile(fastafile, in); while (!in.eof()) { Sequence seq(in); m->gobble(in); if (m->control_pressed) { break; } if (seq.getName() != "") { vector<string> thisSeqsGroups = ct.getGroups(seq.getName()); for (int i = 0; i < thisSeqsGroups.size(); i++) { if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //if this sequence belongs to a group we want them print ofstream outf, outc; m->openOutputFileAppend(ffiles[thisSeqsGroups[i]], outf); seq.printSequence(outf); outf.close(); int numSeqs = ct.getGroupCount(seq.getName(), thisSeqsGroups[i]); m->openOutputFileAppend(cfiles[thisSeqsGroups[i]], outc); outc << seq.getName() << '\t' << numSeqs << '\t' << numSeqs << endl; outc.close(); } } } } in.close(); return 0; } catch(exception& e) { m->errorOut(e, "SplitGroupCommand", "runCount"); exit(1); } }
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){ try { CountTable countTable; if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; } //read countTable countTable.readTable(countfile, true, false); //fill Groups - checks for "all" and for any typo groups vector<string> nameGroups = countTable.getNamesOfGroups(); if (Groups.size() == 0) { Groups = nameGroups; } vector<string> dnamesGroups = designMap->getNamesGroups(); //sanity check bool error = false; if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number //is every group in counttable also in designmap for (int i = 0; i < nameGroups.size(); i++) { if (m->getControl_pressed()) { break; } if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; } } } if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; } //user selected groups - remove some groups from table if (Groups.size() != nameGroups.size()) { for (int i = 0; i < nameGroups.size(); i++) { if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); } } } //ask again in case order changed nameGroups = countTable.getNamesOfGroups(); int numGroups = nameGroups.size(); //create new table CountTable newTable; vector<string> treatments = designMap->getCategory(); map<string, vector<int> > clearedMap; for (int i = 0; i < treatments.size(); i++) { newTable.addGroup(treatments[i]); vector<int> temp; clearedMap[treatments[i]] = temp; } treatments = newTable.getNamesOfGroups(); set<string> namesToRemove; vector<string> namesOfSeqs = countTable.getNamesOfSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->getControl_pressed()) { break; } vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]); map<string, vector<int> > thisSeqsMap = clearedMap; for (int j = 0; j < numGroups; j++) { thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]); } //create new counts for seq for new table vector<int> newCounts; int totalAbund = 0; for (int j = 0; j < treatments.size(); j++){ int abund = mergeAbund(thisSeqsMap[treatments[j]]); newCounts.push_back(abund); //order matters, add in count for each treatment in new table. totalAbund += abund; } //add seq to new table if(totalAbund == 0) { namesToRemove.insert(namesOfSeqs[i]); }else { newTable.push_back(namesOfSeqs[i], newCounts); } } if (error) { m->setControl_pressed(true); return 0; } //remove sequences zeroed out by median method if (namesToRemove.size() != 0) { //print names ofstream out; string accnosFile = "accnosFile.temp"; util.openOutputFile(accnosFile, out); //output to .accnos file for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) { if (m->getControl_pressed()) { out.close(); util.mothurRemove(accnosFile); return 0; } out << *it << endl; } out.close(); //run remove.seqs string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine(); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map<string, vector<string> > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); util.mothurRemove(accnosFile); } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(countfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); newTable.printTable(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processCountFile"); exit(1); } }
int RemoveGroupsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } //get groups you want to remove if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); } if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all vector<string> namesGroups = groupMap->getNamesOfGroups(); vector<string> checkedGroups; for (int i = 0; i < Groups.size(); i++) { if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); } else { m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); } } if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; } else { Groups = checkedGroups; m->setGroups(Groups); } //fill names with names of sequences that are from the groups we want to remove fillNames(); delete groupMap; }else if (countfile != ""){ if ((fastafile != "") || (listfile != "") || (taxfile != "")) { m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n"); } CountTable ct; ct.readTable(countfile, true, false); if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; } vector<string> gNamesOfGroups = ct.getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, gNamesOfGroups); vector<string> namesOfSeqs = ct.getNamesOfSeqs(); sort(Groups.begin(), Groups.end()); for (int i = 0; i < namesOfSeqs.size(); i++) { vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]); if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you names.insert(namesOfSeqs[i]); } } } if (m->control_pressed) { return 0; } //read through the correct file and output lines you want to keep if (namefile != "") { readName(); } if (fastafile != "") { readFasta(); } if (groupfile != "") { readGroup(); } if (countfile != "") { readCount(); } if (listfile != "") { readList(); } if (taxfile != "") { readTax(); } if (sharedfile != "") { readShared(); } if (designfile != "") { readDesign(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (outputNames.size() != 0) { m->mothurOutEndLine(); m->mothurOut("Output File names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } itTypes = outputTypes.find("list"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); } } itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } itTypes = outputTypes.find("shared"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); } } itTypes = outputTypes.find("design"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } } } return 0; } catch(exception& e) { m->errorOut(e, "RemoveGroupsCommand", "execute"); exit(1); } }
EstOutput Parsimony::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; CountTable* ct = t->getCountTable(); //if the users enters no groups then give them the score of all groups vector<string> mGroups = m->getGroups(); int numGroups = mGroups.size(); //calculate number of comparsions int numComp = 0; vector< vector<string> > namesOfGroupCombos; for (int r=0; r<numGroups; r++) { for (int l = 0; l < r; l++) { numComp++; vector<string> groups; groups.push_back(mGroups[r]); groups.push_back(mGroups[l]); //cout << globaldata->Groups[r] << '\t' << globaldata->Groups[l] << endl; namesOfGroupCombos.push_back(groups); } } //numComp+1 for AB, AC, BC, ABC if (numComp != 1) { vector<string> groups; if (numGroups == 0) { //get score for all users groups vector<string> tGroups = ct->getNamesOfGroups(); for (int i = 0; i < tGroups.size(); i++) { if (tGroups[i] != "xxx") { groups.push_back(tGroups[i]); //cout << tmap->namesOfGroups[i] << endl; } } namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < mGroups.size(); i++) { groups.push_back(mGroups[i]); //cout << globaldata->Groups[i] << endl; } namesOfGroupCombos.push_back(groups); } } lines.clear(); int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); return data; } catch(exception& e) { m->errorOut(e, "Parsimony", "getValues"); exit(1); } }
//********************************************************************************************************************** int SharedCommand::createSharedFromListGroup() { try { GroupMap* groupMap = NULL; CountTable* countTable = NULL; pickedGroups = false; if (groupfile != "") { groupMap = new GroupMap(groupfile); int groupError = groupMap->readMap(); if (groupError == 1) { delete groupMap; return 0; } vector<string> allGroups = groupMap->getNamesOfGroups(); if (Groups.size() == 0) { Groups = allGroups; } else { pickedGroups = true; } }else{ countTable = new CountTable(); countTable->readTable(countfile, true, false); vector<string> allGroups = countTable->getNamesOfGroups(); if (Groups.size() == 0) { Groups = allGroups; } else { pickedGroups = true; } } int numGroups = Groups.size(); if (m->getControl_pressed()) { return 0; } ofstream out; string filename = ""; if (!pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); util.openOutputFile(filename, out); } //set fileroot fileroot = outputDir + util.getRootName(util.getSimpleName(listfile)); map<string, string> variables; variables["[filename]"] = fileroot; string errorOff = "no error"; InputData input(listfile, "shared", Groups); SharedListVector* SharedList = input.getSharedListVector(); string lastLabel = SharedList->getLabel(); SharedRAbundVectors* lookup; if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } out.close(); if (!pickedGroups) { util.mothurRemove(filename); } return 0; } //sanity check vector<string> namesSeqs; int numGroupNames = 0; if (current->getGroupMode() == "group") { namesSeqs = groupMap->getNamesSeqs(); numGroupNames = groupMap->getNumSeqs(); } else { namesSeqs = countTable->getNamesOfSeqs(); numGroupNames = countTable->getNumUniqueSeqs(); } int error = ListGroupSameSeqs(namesSeqs, SharedList); if ((!pickedGroups) && (SharedList->getNumSeqs() != numGroupNames)) { //if the user has not specified any groups and their files don't match exit with error m->mothurOut("Your group file contains " + toString(numGroupNames) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct.\n"); m->setControl_pressed(true); out.close(); if (!pickedGroups) { util.mothurRemove(filename); } //remove blank shared file you made //delete memory delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } return 0; } if (error == 1) { m->setControl_pressed(true); } //if user has specified groups make new groupfile for them if ((pickedGroups) && (current->getGroupMode() == "group")) { //make new group file string groups = ""; if (numGroups < 4) { for (int i = 0; i < numGroups-1; i++) { groups += Groups[i] + "."; } groups+=Groups[numGroups-1]; }else { groups = "merge"; } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(listfile)); variables["[group]"] = groups; string newGroupFile = getOutputFileName("group",variables); outputTypes["group"].push_back(newGroupFile); outputNames.push_back(newGroupFile); ofstream outGroups; util.openOutputFile(newGroupFile, outGroups); vector<string> names = groupMap->getNamesSeqs(); string groupName; for (int i = 0; i < names.size(); i++) { groupName = groupMap->getGroup(names[i]); if (isValidGroup(groupName, Groups)) { outGroups << names[i] << '\t' << groupName << endl; } } outGroups.close(); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; bool printHeaders = true; while((SharedList != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } if(allLines == 1 || labels.count(SharedList->getLabel()) == 1){ lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } delete lookup; if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); } if ((util.anyLabelsToProcess(SharedList->getLabel(), userLabels, errorOff) ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = SharedList->getLabel(); delete SharedList; SharedList = input.getSharedListVector(lastLabel); //get new list vector to process lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { delete SharedList; if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } delete lookup; if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; processedLabels.insert(SharedList->getLabel()); userLabels.erase(SharedList->getLabel()); //restore real lastlabel to save below SharedList->setLabel(saveLabel); } lastLabel = SharedList->getLabel(); delete SharedList; SharedList = input.getSharedListVector(); //get new list vector to process } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { if (processedLabels.count(lastLabel) != 1) { needToRun = true; } } //run last label if you need to if (needToRun ) { if (SharedList != NULL) { delete SharedList; } SharedList = input.getSharedListVector(lastLabel); //get new list vector to process lookup = SharedList->getSharedRAbundVector(); m->mothurOut(lookup->getLabel()+"\n"); if (m->getControl_pressed()) { if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (!pickedGroups) { out.close(); util.mothurRemove(filename); } return 0; } //if picked groups must split the shared file by label if (pickedGroups) { string filename = listfile; if (outputDir == "") { outputDir += util.hasPath(filename); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(filename)); variables["[distance]"] = lookup->getLabel(); filename = getOutputFileName("shared",variables); outputNames.push_back(filename); outputTypes["shared"].push_back(filename); ofstream out2; util.openOutputFile(filename, out2); lookup->eliminateZeroOTUS(); printSharedData(lookup, out2, printHeaders); out2.close(); }else { printSharedData(lookup, out, printHeaders); //prints info to the .shared file } delete lookup; delete SharedList; } if (!pickedGroups) { out.close(); } if (groupMap != NULL) { delete groupMap; } if (countTable != NULL) { delete countTable; } if (m->getControl_pressed()) { if (!pickedGroups) { util.mothurRemove(filename); } return 0; } return 0; } catch(exception& e) { m->errorOut(e, "SharedCommand", "createSharedFromListGroup"); exit(1); } }
int TreeGroupCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } if (format == "sharedfile") { InputData input(sharedfile, "sharedfile", Groups); SharedRAbundVectors* lookup = input.getSharedRAbundVectors(); lastLabel = lookup->getLabel(); Groups = lookup->getNamesGroups(); if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command.\n"); return 0; } //create treemap class from groupmap for tree class to use CountTable ct; set<string> nameMap; map<string, string> groupMap; set<string> gps; for (int i = 0; i < Groups.size(); i++) { nameMap.insert(Groups[i]); gps.insert(Groups[i]); groupMap[Groups[i]] = Groups[i]; } ct.createTable(nameMap, groupMap, gps); //fills tree names with shared files groups Treenames = lookup->getNamesGroups(); if (m->getControl_pressed()) { return 0; } //create tree file makeSimsShared(input, lookup, ct); if (m->getControl_pressed()) { for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } return 0; } }else{ //read in dist file filename = inputfile; ReadMatrix* readMatrix; if (format == "column") { readMatrix = new ReadColumnMatrix(filename); } else if (format == "phylip") { readMatrix = new ReadPhylipMatrix(filename); } readMatrix->setCutoff(cutoff); ListVector* list; if(namefile != ""){ NameAssignment* nameMap = new NameAssignment(namefile); nameMap->readMap(); readMatrix->read(nameMap); list = readMatrix->getListVector(); delete nameMap; }else if (countfile != "") { CountTable* ct = new CountTable(); ct->readTable(countfile, true, false); readMatrix->read(ct); list = readMatrix->getListVector(); delete ct; }else { NameAssignment* nameMap = NULL; readMatrix->read(nameMap); list = readMatrix->getListVector(); } SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix(); Treenames.clear(); //make treemap CountTable ct; set<string> nameMap; map<string, string> groupMap; set<string> gps; for (int i = 0; i < list->getNumBins(); i++) { string bin = list->get(i); nameMap.insert(bin); gps.insert(bin); groupMap[bin] = bin; Treenames.push_back(bin); } ct.createTable(nameMap, groupMap, gps); vector<string> namesGroups = ct.getNamesOfGroups(); if (m->getControl_pressed()) { return 0; } vector< vector<double> > matrix = makeSimsDist(dMatrix, list->getNumBins()); delete readMatrix; delete dMatrix; if (m->getControl_pressed()) { return 0; } //create a new filename map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(inputfile)); string outputFile = getOutputFileName("tree",variables); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); Tree* newTree = new Tree(&ct, matrix, Treenames); if (m->getControl_pressed()) { delete newTree; newTree = NULL; } else { newTree->assembleTree(); } if (newTree != NULL) { newTree->createNewickFile(outputFile); delete newTree; } if (m->getControl_pressed()) { return 0; } m->mothurOut("Tree complete.\n"); } //set tree file as new current treefile string currentName = ""; itTypes = outputTypes.find("tree"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTreeFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "TreeGroupCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int RemoveRareCommand::processList(){ try { //you must provide a label because the names in the listfile need to be consistent string thisLabel = ""; if (allLines) { m->mothurOut("For the listfile you must select one label, using first label in your listfile."); m->mothurOutEndLine(); } else if (labels.size() > 1) { m->mothurOut("For the listfile you must select one label, using " + (*labels.begin()) + "."); m->mothurOutEndLine(); thisLabel = *labels.begin(); } else { thisLabel = *labels.begin(); } InputData input(listfile, "list"); ListVector* list = input.getListVector(); //get first one or the one we want if (thisLabel != "") { //use smart distancing set<string> userLabels; userLabels.insert(thisLabel); set<string> processedLabels; string lastLabel = list->getLabel(); while((list != NULL) && (userLabels.size() != 0)) { if(userLabels.count(list->getLabel()) == 1){ processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); break; } if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { processedLabels.insert(list->getLabel()); userLabels.erase(list->getLabel()); delete list; list = input.getListVector(lastLabel); break; } lastLabel = list->getLabel(); delete list; list = input.getListVector(); } if (userLabels.size() != 0) { m->mothurOut("Your file does not include the label " + thisLabel + ". I will use " + lastLabel + "."); m->mothurOutEndLine(); list = input.getListVector(lastLabel); } } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(listfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile)); variables["[extension]"] = m->getExtension(listfile); variables["[tag]"] = list->getLabel(); string outputFileName = getOutputFileName("list", variables); variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); variables["[extension]"] = m->getExtension(groupfile); string outputGroupFileName = getOutputFileName("group", variables); variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); variables["[extension]"] = m->getExtension(countfile); string outputCountFileName = getOutputFileName("count", variables); ofstream out, outGroup; m->openOutputFile(outputFileName, out); bool wroteSomething = false; //if groupfile is given then use it GroupMap* groupMap; CountTable ct; if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); SharedUtil util; vector<string> namesGroups = groupMap->getNamesOfGroups(); util.setGroups(Groups, namesGroups); m->openOutputFile(outputGroupFileName, outGroup); }else if (countfile != "") { ct.readTable(countfile, true, false); if (ct.hasGroupInfo()) { vector<string> namesGroups = ct.getNamesOfGroups(); SharedUtil util; util.setGroups(Groups, namesGroups); } } if (list != NULL) { vector<string> binLabels = list->getLabels(); vector<string> newLabels; //make a new list vector ListVector newList; newList.setLabel(list->getLabel()); //for each bin for (int i = 0; i < list->getNumBins(); i++) { if (m->control_pressed) { if (groupfile != "") { delete groupMap; outGroup.close(); m->mothurRemove(outputGroupFileName); } out.close(); m->mothurRemove(outputFileName); return 0; } //parse out names that are in accnos file string binnames = list->get(i); vector<string> names; string saveBinNames = binnames; m->splitAtComma(binnames, names); int binsize = names.size(); vector<string> newGroupFile; if (groupfile != "") { vector<string> newNames; saveBinNames = ""; for(int k = 0; k < names.size(); k++) { string group = groupMap->getGroup(names[k]); if (m->inUsersGroups(group, Groups)) { newGroupFile.push_back(names[k] + "\t" + group); newNames.push_back(names[k]); saveBinNames += names[k] + ","; } } names = newNames; binsize = names.size(); saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); }else if (countfile != "") { saveBinNames = ""; binsize = 0; for(int k = 0; k < names.size(); k++) { if (ct.hasGroupInfo()) { vector<string> thisSeqsGroups = ct.getGroups(names[k]); int thisSeqsCount = 0; for (int n = 0; n < thisSeqsGroups.size(); n++) { if (m->inUsersGroups(thisSeqsGroups[n], Groups)) { thisSeqsCount += ct.getGroupCount(names[k], thisSeqsGroups[n]); } } binsize += thisSeqsCount; //if you don't have any seqs from the groups the user wants, then remove you. if (thisSeqsCount == 0) { newGroupFile.push_back(names[k]); } else { saveBinNames += names[k] + ","; } }else { binsize += ct.getNumSeqs(names[k]); saveBinNames += names[k] + ","; } } saveBinNames = saveBinNames.substr(0, saveBinNames.length()-1); } if (binsize > nseqs) { //keep bin newList.push_back(saveBinNames); newLabels.push_back(binLabels[i]); if (groupfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { outGroup << newGroupFile[k] << endl; } } else if (countfile != "") { for(int k = 0; k < newGroupFile.size(); k++) { ct.remove(newGroupFile[k]); } } }else { if (countfile != "") { for(int k = 0; k < names.size(); k++) { ct.remove(names[k]); } } } } //print new listvector if (newList.getNumBins() != 0) { wroteSomething = true; newList.setLabels(newLabels); newList.printHeaders(out); newList.print(out); } } out.close(); if (groupfile != "") { outGroup.close(); outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName); } if (countfile != "") { if (ct.hasGroupInfo()) { vector<string> allGroups = ct.getNamesOfGroups(); for (int i = 0; i < allGroups.size(); i++) { if (!m->inUsersGroups(allGroups[i], Groups)) { ct.removeGroup(allGroups[i]); } } } ct.printTable(outputCountFileName); outputTypes["count"].push_back(outputCountFileName); outputNames.push_back(outputCountFileName); } if (wroteSomething == false) { m->mothurOut("Your file contains only rare sequences."); m->mothurOutEndLine(); } outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "RemoveRareCommand", "processList"); exit(1); } }
EstOutput Unweighted::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; CountTable* ct = t->getCountTable(); //if the users enters no groups then give them the score of all groups int numGroups = m->getNumGroups(); //calculate number of comparsions int numComp = 0; vector< vector<string> > namesOfGroupCombos; for (int r=0; r<numGroups; r++) { for (int l = 0; l < r; l++) { numComp++; vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } if (numComp != 1) { vector<string> groups; if (numGroups == 0) { //get score for all users groups for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { if ((ct->getNamesOfGroups())[i] != "xxx") { groups.push_back((ct->getNamesOfGroups())[i]); } } namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < m->getNumGroups(); i++) { groups.push_back((m->getGroups())[i]); } namesOfGroupCombos.push_back(groups); } } lines.clear(); int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); lines.clear(); return data; } catch(exception& e) { m->errorOut(e, "Unweighted", "getValues"); exit(1); } }