int DeconvoluteCommand::execute() { try { if (abort) { if (calledHelp) { return 0; } return 2; } //prepare filenames and open files map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile)); string outNameFile = getOutputFileName("name", variables); string outCountFile = getOutputFileName("count", variables); variables["[extension]"] = util.getExtension(fastafile); string outFastaFile = getOutputFileName("fasta", variables); map<string, string> nameMap; map<string, string>::iterator itNames; if (namefile != "") { util.readNames(namefile, nameMap); if (namefile == outNameFile){ //prepare filenames and open files map<string, string> mvariables; mvariables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile)); mvariables["[tag]"] = "unique"; outNameFile = getOutputFileName("name", mvariables); } } CountTable ct; if (countfile != "") { ct.readTable(countfile, true, false); if (countfile == outCountFile){ //prepare filenames and open files map<string, string> mvariables; mvariables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile)); mvariables["[tag]"] = "unique"; outCountFile = getOutputFileName("count", mvariables); } } if (m->getControl_pressed()) { return 0; } ifstream in; util.openInputFile(fastafile, in); ofstream outFasta; util.openOutputFile(outFastaFile, outFasta); map<string, string> sequenceStrings; //sequenceString -> list of names. "atgc...." -> seq1,seq2,seq3. map<string, string>::iterator itStrings; set<string> nameInFastaFile; //for sanity checking set<string>::iterator itname; vector<string> nameFileOrder; CountTable newCt; int count = 0; while (!in.eof()) { if (m->getControl_pressed()) { in.close(); outFasta.close(); util.mothurRemove(outFastaFile); return 0; } Sequence seq(in); if (seq.getName() != "") { //sanity checks itname = nameInFastaFile.find(seq.getName()); if (itname == nameInFastaFile.end()) { nameInFastaFile.insert(seq.getName()); } else { m->mothurOut("[ERROR]: You already have a sequence named " + seq.getName() + " in your fasta file, sequence names must be unique, please correct."); m->mothurOutEndLine(); } itStrings = sequenceStrings.find(seq.getAligned()); if (itStrings == sequenceStrings.end()) { //this is a new unique sequence //output to unique fasta file seq.printSequence(outFasta); if (namefile != "") { itNames = nameMap.find(seq.getName()); if (itNames == nameMap.end()) { //namefile and fastafile do not match m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine(); }else { if (format == "name") { sequenceStrings[seq.getAligned()] = itNames->second; nameFileOrder.push_back(seq.getAligned()); }else { newCt.push_back(seq.getName(), util.getNumNames(itNames->second)); sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned()); } } }else if (countfile != "") { if (format == "name") { int numSeqs = ct.getNumSeqs(seq.getName()); string expandedName = seq.getName()+"_0"; for (int i = 1; i < numSeqs; i++) { expandedName += "," + seq.getName() + "_" + toString(i); } sequenceStrings[seq.getAligned()] = expandedName; nameFileOrder.push_back(seq.getAligned()); }else { ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned()); } }else { if (format == "name") { sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned()); } else { newCt.push_back(seq.getName()); sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned()); } } }else { //this is a dup if (namefile != "") { itNames = nameMap.find(seq.getName()); if (itNames == nameMap.end()) { //namefile and fastafile do not match m->mothurOut("[ERROR]: " + seq.getName() + " is in your fasta file, and not in your namefile, please correct."); m->mothurOutEndLine(); }else { if (format == "name") { sequenceStrings[seq.getAligned()] += "," + itNames->second; } else { int currentReps = newCt.getNumSeqs(itStrings->second); newCt.setNumSeqs(itStrings->second, currentReps+(util.getNumNames(itNames->second))); } } }else if (countfile != "") { if (format == "name") { int numSeqs = ct.getNumSeqs(seq.getName()); string expandedName = seq.getName()+"_0"; for (int i = 1; i < numSeqs; i++) { expandedName += "," + seq.getName() + "_" + toString(i); } sequenceStrings[seq.getAligned()] += "," + expandedName; }else { int num = ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table if (num != 0) { //its in the table ct.mergeCounts(itStrings->second, seq.getName()); //merges counts and saves in uniques name } } }else { if (format == "name") { sequenceStrings[seq.getAligned()] += "," + seq.getName(); } else { int currentReps = newCt.getNumSeqs(itStrings->second); newCt.setNumSeqs(itStrings->second, currentReps+1); } } } count++; } util.gobble(in); if(count % 1000 == 0) { m->mothurOutJustToScreen(toString(count) + "\t" + toString(sequenceStrings.size()) + "\n"); } } if(count % 1000 != 0) { m->mothurOut(toString(count) + "\t" + toString(sequenceStrings.size())); m->mothurOutEndLine(); } in.close(); outFasta.close(); if (m->getControl_pressed()) { util.mothurRemove(outFastaFile); return 0; } //print new names file ofstream outNames; if (format == "name") { util.openOutputFile(outNameFile, outNames); outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile); } else { util.openOutputFile(outCountFile, outNames); outputTypes["count"].push_back(outCountFile); outputNames.push_back(outCountFile); } if ((countfile != "") && (format == "count")) { ct.printHeaders(outNames); } else if ((countfile == "") && (format == "count")) { newCt.printHeaders(outNames); } for (int i = 0; i < nameFileOrder.size(); i++) { if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outFastaFile); outNames.close(); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } itStrings = sequenceStrings.find(nameFileOrder[i]); if (itStrings != sequenceStrings.end()) { if (format == "name") { //get rep name int pos = (itStrings->second).find_first_of(','); if (pos == string::npos) { // only reps itself outNames << itStrings->second << '\t' << itStrings->second << endl; }else { outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl; } }else { if (countfile != "") { ct.printSeq(outNames, itStrings->second); } else if (format == "count") { newCt.printSeq(outNames, itStrings->second); } } }else{ m->mothurOut("[ERROR]: mismatch in namefile print."); m->mothurOutEndLine(); m->setControl_pressed(true); } } outNames.close(); if (m->getControl_pressed()) { outputTypes.clear(); util.mothurRemove(outFastaFile); for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } m->mothurOut("\nOutput File Names: \n"); outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setNameFile(currentName); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setCountFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "DeconvoluteCommand", "execute"); exit(1); } }
int MergeGroupsCommand::processCountFile(DesignMap*& designMap){ try { CountTable countTable; if (!countTable.testGroups(countfile)) { m->mothurOut("[ERROR]: your countfile contains no group information, please correct.\n"); m->setControl_pressed(true); return 0; } //read countTable countTable.readTable(countfile, true, false); //fill Groups - checks for "all" and for any typo groups vector<string> nameGroups = countTable.getNamesOfGroups(); if (Groups.size() == 0) { Groups = nameGroups; } vector<string> dnamesGroups = designMap->getNamesGroups(); //sanity check bool error = false; if (nameGroups.size() == dnamesGroups.size()) { //at least there are the same number //is every group in counttable also in designmap for (int i = 0; i < nameGroups.size(); i++) { if (m->getControl_pressed()) { break; } if (!util.inUsersGroups(nameGroups[i], dnamesGroups)) { error = true; break; } } } if (error) { m->mothurOut("[ERROR]: Your countfile does not contain the same groups as your design file, please correct\n"); m->setControl_pressed(true); return 0; } //user selected groups - remove some groups from table if (Groups.size() != nameGroups.size()) { for (int i = 0; i < nameGroups.size(); i++) { if (!util.inUsersGroups(nameGroups[i], Groups)) { countTable.removeGroup(nameGroups[i]); } } } //ask again in case order changed nameGroups = countTable.getNamesOfGroups(); int numGroups = nameGroups.size(); //create new table CountTable newTable; vector<string> treatments = designMap->getCategory(); map<string, vector<int> > clearedMap; for (int i = 0; i < treatments.size(); i++) { newTable.addGroup(treatments[i]); vector<int> temp; clearedMap[treatments[i]] = temp; } treatments = newTable.getNamesOfGroups(); set<string> namesToRemove; vector<string> namesOfSeqs = countTable.getNamesOfSeqs(); for (int i = 0; i < namesOfSeqs.size(); i++) { if (m->getControl_pressed()) { break; } vector<int> thisSeqsCounts = countTable.getGroupCounts(namesOfSeqs[i]); map<string, vector<int> > thisSeqsMap = clearedMap; for (int j = 0; j < numGroups; j++) { thisSeqsMap[designMap->get(nameGroups[j])].push_back(thisSeqsCounts[j]); } //create new counts for seq for new table vector<int> newCounts; int totalAbund = 0; for (int j = 0; j < treatments.size(); j++){ int abund = mergeAbund(thisSeqsMap[treatments[j]]); newCounts.push_back(abund); //order matters, add in count for each treatment in new table. totalAbund += abund; } //add seq to new table if(totalAbund == 0) { namesToRemove.insert(namesOfSeqs[i]); }else { newTable.push_back(namesOfSeqs[i], newCounts); } } if (error) { m->setControl_pressed(true); return 0; } //remove sequences zeroed out by median method if (namesToRemove.size() != 0) { //print names ofstream out; string accnosFile = "accnosFile.temp"; util.openOutputFile(accnosFile, out); //output to .accnos file for (set<string>::iterator it = namesToRemove.begin(); it != namesToRemove.end(); it++) { if (m->getControl_pressed()) { out.close(); util.mothurRemove(accnosFile); return 0; } out << *it << endl; } out.close(); //run remove.seqs string inputString = "accnos=" + accnosFile + ", fasta=" + fastafile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine(); current->setMothurCalling(true); Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map<string, vector<string> > filenames = removeCommand->getOutputFiles(); delete removeCommand; current->setMothurCalling(false); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); util.mothurRemove(accnosFile); } string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += util.hasPath(countfile); } map<string, string> variables; variables["[filename]"] = thisOutputDir + util.getRootName(util.getSimpleName(countfile)); variables["[extension]"] = util.getExtension(countfile); string outputFileName = getOutputFileName("count", variables); outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName); newTable.printTable(outputFileName); return 0; } catch(exception& e) { m->errorOut(e, "MergeGroupsCommand", "processCountFile"); exit(1); } }
//********************************************************************************************************************** SharedCommand::SharedCommand(string option) { try { abort = false; calledHelp = false; pickedGroups=false; allLines = 1; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } else if(option == "citation") { citation(); abort = true; calledHelp = true;} else { vector<string> myArray = setParameters(); OptionParser parser(option); map<string, string> parameters = parser.getParameters(); ValidParameters validParameter; map<string, string>::iterator it; //check to make sure all parameters are valid for command for (it = parameters.begin(); it != parameters.end(); it++) { if (!validParameter.isValidParameter(it->first, myArray, it->second)) { abort = true; } } //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.valid(parameters, "inputdir"); if (inputDir == "not found"){ inputDir = ""; } else { string path; it = parameters.find("list"); //user has given a template file if(it != parameters.end()){ path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["list"] = inputDir + it->second; } } it = parameters.find("group"); //user has given a template file if(it != parameters.end()){ path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["group"] = inputDir + it->second; } } it = parameters.find("count"); //user has given a template file if(it != parameters.end()){ path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["count"] = inputDir + it->second; } } it = parameters.find("biom"); //user has given a template file if(it != parameters.end()){ path = util.hasPath(it->second); //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["biom"] = inputDir + it->second; } } } vector<string> tempOutNames; outputTypes["shared"] = tempOutNames; outputTypes["group"] = tempOutNames; outputTypes["map"] = tempOutNames; //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.valid(parameters, "outputdir"); if (outputDir == "not found"){ outputDir = ""; } //check for required parameters listfile = validParameter.validFile(parameters, "list"); if (listfile == "not open") { listfile = ""; abort = true; } else if (listfile == "not found") { listfile = ""; } else { current->setListFile(listfile); } biomfile = validParameter.validFile(parameters, "biom"); if (biomfile == "not open") { biomfile = ""; abort = true; } else if (biomfile == "not found") { biomfile = ""; } else { current->setBiomFile(biomfile); } ordergroupfile = validParameter.validFile(parameters, "ordergroup"); if (ordergroupfile == "not open") { abort = true; } else if (ordergroupfile == "not found") { ordergroupfile = ""; } groupfile = validParameter.validFile(parameters, "group"); if (groupfile == "not open") { groupfile = ""; abort = true; } else if (groupfile == "not found") { groupfile = ""; } else { current->setGroupFile(groupfile); } countfile = validParameter.validFile(parameters, "count"); if (countfile == "not open") { countfile = ""; abort = true; } else if (countfile == "not found") { countfile = ""; } else { current->setCountFile(countfile); CountTable temp; if (!temp.testGroups(countfile)) { m->mothurOut("\n[WARNING]: Your count file does not have group info, all reads will be assigned to mothurGroup.\n"); temp.readTable(countfile, false, false); //dont read groups map<string, int> seqs = temp.getNameMap(); CountTable newCountTable; newCountTable.addGroup("mothurGroup"); for (map<string, int>::iterator it = seqs.begin(); it != seqs.end(); it++) { vector<int> counts; counts.push_back(it->second); newCountTable.push_back(it->first, counts); } string newCountfileName = util.getRootName(countfile) + "mothurGroup" + util.getExtension(countfile); newCountTable.printTable(newCountfileName); current->setCountFile(newCountfileName); countfile = newCountfileName; outputNames.push_back(newCountfileName); } } if ((biomfile == "") && (listfile == "") && (countfile == "")) { //you must provide at least one of the following //is there are current file available for either of these? //give priority to list, then biom, then count listfile = current->getListFile(); if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter.\n"); } else { biomfile = current->getBiomFile(); if (biomfile != "") { m->mothurOut("Using " + biomfile + " as input file for the biom parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: No valid current files. You must provide a list or biom or count file before you can use the make.shared command.\n"); abort = true; } } } } else if ((biomfile != "") && (listfile != "")) { m->mothurOut("When executing a make.shared command you must enter ONLY ONE of the following: list or biom.\n"); abort = true; } if (listfile != "") { if ((groupfile == "") && (countfile == "")) { groupfile = current->getGroupFile(); if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter.\n"); } else { countfile = current->getCountFile(); if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter.\n"); } else { m->mothurOut("[ERROR]: You need to provide a groupfile or countfile if you are going to use the list format.\n"); abort = true; } } } } string groups = validParameter.valid(parameters, "groups"); if (groups == "not found") { groups = ""; } else { pickedGroups=true; util.splitAtDash(groups, Groups); if (Groups.size() != 0) { if (Groups[0]== "all") { Groups.clear(); } } } //check for optional parameter and set defaults // ...at some point should added some additional type checking... string label = validParameter.valid(parameters, "label"); if (label == "not found") { label = ""; } else { if(label != "all") { util.splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } if ((listfile == "") && (biomfile == "") && (countfile != "")) { //building a shared file from a count file, require label if (labels.size() == 0) { m->mothurOut("[ERROR]: You must provide a label when converting a count file to a shared file, please correct.\n"); abort = true; } } } } catch(exception& e) { m->errorOut(e, "SharedCommand", "SharedCommand"); exit(1); } }