int ChimeraPerseusCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } //process each file for (int s = 0; s < fastaFileNames.size(); s++) { m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); int start = time(NULL); if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.chimera"; string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "perseus.accnos"; //string newFasta = m->getRootName(fastaFileNames[s]) + "temp"; //you provided a groupfile string groupFile = ""; if (groupFileNames.size() != 0) { groupFile = groupFileNames[s]; } string nameFile = ""; if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one nameFile = nameFileNames[s]; }else { nameFile = getNamesFile(fastaFileNames[s]); } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } int numSeqs = 0; int numChimeras = 0; if (groupFile != "") { //Parse sequences by group SequenceParser parser(groupFile, fastaFileNames[s], nameFile); vector<string> groups = parser.getNamesOfGroups(); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } //clears files ofstream out, out1, out2; m->openOutputFile(outputFileName, out); out.close(); m->openOutputFile(accnosFileName, out1); out1.close(); if(processors == 1) { numSeqs = driverGroups(parser, outputFileName, accnosFileName, 0, groups.size(), groups); } else { numSeqs = createProcessesGroups(parser, outputFileName, accnosFileName, groups, groupFile, fastaFileNames[s], nameFile); } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } numChimeras = deconvoluteResults(parser, outputFileName, accnosFileName); m->mothurOut("The number of sequences checked may be larger than the number of unique sequences because some sequences are found in several samples."); m->mothurOutEndLine(); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } }else{ if (processors != 1) { m->mothurOut("Without a groupfile, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } //read sequences and store sorted by frequency vector<seqData> sequences = readFiles(fastaFileNames[s], nameFile); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } numSeqs = driver(outputFileName, sequences, accnosFileName, numChimeras); } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences. " + toString(numChimeras) + " chimeras were found."); m->mothurOutEndLine(); outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); } //set accnos file as new current accnosfile string current = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraPerseusCommand", "execute"); exit(1); } }
int PreClusterCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } int start = time(NULL); if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, 1000); } else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 1000); } else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } else if(align == "noalign") { alignment = new NoAlign(); } else { m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); m->mothurOutEndLine(); alignment = new NeedlemanOverlap(gapOpen, match, misMatch, 1000); } string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile)); map<string, string> variables; variables["[filename]"] = fileroot; string newNamesFile = getOutputFileName("name",variables); string newCountFile = getOutputFileName("count",variables); string newMapFile = getOutputFileName("map",variables); //add group name if by group variables["[extension]"] = m->getExtension(fastafile); string newFastaFile = getOutputFileName("fasta", variables); outputNames.push_back(newFastaFile); outputTypes["fasta"].push_back(newFastaFile); if (countfile == "") { outputNames.push_back(newNamesFile); outputTypes["name"].push_back(newNamesFile); } else { outputNames.push_back(newCountFile); outputTypes["count"].push_back(newCountFile); } if (bygroup) { //clear out old files ofstream outFasta; m->openOutputFile(newFastaFile, outFasta); outFasta.close(); ofstream outNames; m->openOutputFile(newNamesFile, outNames); outNames.close(); newMapFile = fileroot + "precluster."; //parse fasta and name file by group vector<string> groups; if (countfile != "") { cparser = new SequenceCountParser(countfile, fastafile); groups = cparser->getNamesOfGroups(); }else { if (namefile != "") { parser = new SequenceParser(groupfile, fastafile, namefile); } else { parser = new SequenceParser(groupfile, fastafile); } groups = parser->getNamesOfGroups(); } if(processors == 1) { driverGroups(newFastaFile, newNamesFile, newMapFile, 0, groups.size(), groups); } else { createProcessesGroups(newFastaFile, newNamesFile, newMapFile, groups); } if (countfile != "") { mergeGroupCounts(newCountFile, newNamesFile, newFastaFile); delete cparser; }else { delete parser; //run unique.seqs for deconvolute results string inputString = "fasta=" + newFastaFile; if (namefile != "") { inputString += ", name=" + newNamesFile; } m->mothurOutEndLine(); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; Command* uniqueCommand = new DeconvoluteCommand(inputString); uniqueCommand->execute(); map<string, vector<string> > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->renameFile(filenames["fasta"][0], newFastaFile); m->renameFile(filenames["name"][0], newNamesFile); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete alignment; return 0; } m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run pre.cluster."); m->mothurOutEndLine(); }else { if (processors != 1) { m->mothurOut("When using running without group information mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } if (namefile != "") { readNameFile(); } //reads fasta file and return number of seqs int numSeqs = readFASTA(); //fills alignSeqs and makes all seqs active if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete alignment; return 0; } if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); delete alignment; return 0; } if (diffs > length) { m->mothurOut("Error: diffs is greater than your sequence length."); m->mothurOutEndLine(); delete alignment; return 0; } int count = process(newMapFile); outputNames.push_back(newMapFile); outputTypes["map"].push_back(newMapFile); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete alignment; return 0; } m->mothurOut("Total number of sequences before precluster was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine(); m->mothurOut("pre.cluster removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); if (countfile != "") { newNamesFile = newCountFile; } printData(newFastaFile, newNamesFile, ""); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete alignment; return 0; } delete alignment; m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } } return 0; } catch(exception& e) { m->errorOut(e, "PreClusterCommand", "execute"); exit(1); } }