int AlignCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, util.getRandomNumber(), true); if (m->getControl_pressed()) { outputTypes.clear(); return 0; } time_t start = time(NULL); m->mothurOut("\nAligning sequences from " + fastafile + " ...\n" ); if (outputDir == "") { outputDir += util.hasPath(fastafile); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile)); string alignFileName = getOutputFileName("fasta", variables); string reportFileName = getOutputFileName("alignreport", variables); string accnosFileName = getOutputFileName("accnos", variables); bool hasAccnos = true; vector<long long> numFlipped; numFlipped.push_back(0); //numflipped because reverse was better numFlipped.push_back(0); //total number of sequences with over 50% of bases removed long long numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, fastafile, numFlipped); if (m->getControl_pressed()) { util.mothurRemove(accnosFileName); util.mothurRemove(alignFileName); util.mothurRemove(reportFileName); outputTypes.clear(); return 0; } //delete accnos file if its blank else report to user if (util.isBlank(accnosFileName)) { util.mothurRemove(accnosFileName); hasAccnos = false; } else { m->mothurOut("[WARNING]: " + toString(numFlipped[1]) + " of your sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); if (!flip) { m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well. flip=t"); }else{ m->mothurOut("\n[NOTE]: " + toString(numFlipped[0]) + " of your sequences were reversed to produce a better alignment."); } m->mothurOutEndLine(); } outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName); outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName); if (hasAccnos) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); } m->mothurOut("\nIt took " + toString(time(NULL) - start) + " seconds to align " + toString(numFastaSeqs) + " sequences.\n"); //set align file as new current fastafile string currentFasta = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; current->setFastaFile(currentFasta); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i] +"\n"); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AlignCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int SffMultipleCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } vector<string> sffFiles, oligosFiles; readFile(sffFiles, oligosFiles); string thisOutputDir = outputDir; if (thisOutputDir == "") { thisOutputDir = m->hasPath(filename); } string fileroot = thisOutputDir + m->getRootName(m->getSimpleName(filename)); map<string, string> variables; variables["[filename]"] = fileroot; string fasta = getOutputFileName("fasta",variables); string name = getOutputFileName("name",variables); string group = getOutputFileName("group",variables); if (m->control_pressed) { return 0; } if (sffFiles.size() < processors) { processors = sffFiles.size(); } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) #else //trim.flows, shhh.flows cannot handle multiple processors for windows. processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n"); #endif if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size(), fasta, name, group); } else { createProcesses(sffFiles, oligosFiles, fasta, name, group); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (append) { outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta); m->setFastaFile(fasta); outputNames.push_back(name); outputTypes["name"].push_back(name); m->setNameFile(name); if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); } } m->setProcessors(toString(processors)); //report output filenames m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "SffMultipleCommand", "execute"); exit(1); } }
int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){ try { RarefactionCurveData* rcd = new RarefactionCurveData(); for(int i=0;i<displays.size();i++){ rcd->registerDisplay(displays[i]); } //convert freq percentage to number int increment = 1; if (percentFreq < 1.0) { increment = numSeqs * percentFreq; } else { increment = percentFreq; } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ driver(rcd, increment, nIters); }else{ vector<int> procIters; int numItersPerProcessor = nIters / processors; //divide iters between processes for (int i = 0; i < processors; i++) { if(i == processors - 1){ numItersPerProcessor = nIters - i * numItersPerProcessor; } procIters.push_back(numItersPerProcessor); } createProcesses(procIters, rcd, increment, nIters); } #else driver(rcd, increment, nIters); #endif for(int i=0;i<displays.size();i++){ displays[i]->close(); } delete rcd; return 0; } catch(exception& e) { m->errorOut(e, "Rarefact", "getCurve"); exit(1); } }
EstOutput Weighted::getValues(Tree* t, int p, string o) { try { data.clear(); //clear out old values int numGroups; vector<double> D; processors = p; outputDir = o; CountTable* ct = t->getCountTable(); numGroups = m->getNumGroups(); if (m->control_pressed) { return data; } //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; vector< vector<string> > namesOfGroupCombos; for (int i=0; i<numGroups; i++) { for (int l = 0; l < i; l++) { //initialize weighted scores //WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0; vector<string> groups; groups.push_back((m->getGroups())[i]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); lines.clear(); return data; } catch(exception& e) { m->errorOut(e, "Weighted", "getValues"); exit(1); } }
//*************************************************************************************************************** int DegapSeqsCommand::execute(){ try{ if (abort) { if (calledHelp) { return 0; } return 2; } m->mothurOut("Degapping sequences from " + fastafile + " ...\n" ); string tempOutputDir = outputDir; if (outputDir == "") { tempOutputDir = util.hasPath(fastafile); } map<string, string> variables; variables["[filename]"] = tempOutputDir + util.getRootName(util.getSimpleName(fastafile)); string degapFile = getOutputFileName("fasta", variables); outputNames.push_back(degapFile); outputTypes["fasta"].push_back(degapFile); long start = time(NULL); int numSeqs = createProcesses(fastafile, degapFile); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to degap " + toString(numSeqs) + " sequences.\n\n"); if (m->getControl_pressed()) { for (int j = 0; j < outputNames.size(); j++) { util.mothurRemove(outputNames[j]); } return 0; } //set fasta file as new current fastafile string currentName = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); } } m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "DegapSeqsCommand", "execute"); exit(1); } }
int TreeGroupCommand::makeSimsShared(InputData& input, SharedRAbundVectors*& lookup, CountTable& ct) { try { if (subsample) { if (subsampleSize == -1) { //user has not set size, set size = smallest samples size subsampleSize = lookup->getNumSeqsSmallestGroup(); }else { lookup->removeGroups(subsampleSize); Groups = lookup->getNamesGroups(); Treenames = Groups; } if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command.\n"); m->setControl_pressed(true); return 0; } } numGroups = lookup->size(); set<string> processedLabels; set<string> userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookup != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->getControl_pressed()) { delete lookup; return 1; } if(allLines == 1 || labels.count(lookup->getLabel()) == 1){ m->mothurOut(lookup->getLabel()+"\n"); createProcesses(lookup, ct); processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); } if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup->getLabel(); delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup->getLabel()+"\n"); createProcesses(lookup, ct); processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel()); //restore real lastlabel to save below lookup->setLabels(saveLabel); } lastLabel = lookup->getLabel(); //get next line to process delete lookup; lookup = input.getSharedRAbundVectors(); } if (m->getControl_pressed()) { return 1; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + ".\n"); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + ".\n"); } } //run last label if you need to if (needToRun ) { delete lookup; lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup->getLabel()+"\n"); createProcesses(lookup, ct); delete lookup; } return 0; } catch(exception& e) { m->errorOut(e, "TreeGroupCommand", "makeSimsShared"); exit(1); } }
int ChimeraCheckCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } for (int i = 0; i < fastaFileNames.size(); i++) { m->mothurOut("Checking sequences from " + fastaFileNames[i] + " ..." ); m->mothurOutEndLine(); int start = time(NULL); string thisNameFile = ""; if (nameFileNames.size() != 0) { thisNameFile = nameFileNames[i]; } chimera = new ChimeraCheckRDP(fastaFileNames[i], templatefile, thisNameFile, svg, increment, ksize, outputDir); if (m->control_pressed) { delete chimera; return 0; } if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[i]); }//if user entered a file with a path then preserve it map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])); string outputFileName = getOutputFileName("chimera", variables); outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); #ifdef USE_MPI int pid, numSeqsPerProcessor; int tag = 2001; vector<unsigned long long> MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); MPI_File inMPI; MPI_File outMPI; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; char outFilename[1024]; strcpy(outFilename, outputFileName.c_str()); char inFileName[1024]; strcpy(inFileName, fastaFileNames[i].c_str()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; } if (pid == 0) { //you are the root process MPIPos = m->setFilePosFasta(fastaFileNames[i], numSeqs); //fills MPIPos, returns numSeqs //send file positions to all processes for(int j = 1; j < processors; j++) { MPI_Send(&numSeqs, 1, MPI_INT, j, tag, MPI_COMM_WORLD); MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, j, tag, MPI_COMM_WORLD); } //figure out how many sequences you have to align numSeqsPerProcessor = numSeqs / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; } //wait on chidren for(int j = 1; j < processors; j++) { char buf[5]; MPI_Recv(buf, 5, MPI_CHAR, j, tag, MPI_COMM_WORLD, &status); } }else{ //you are a child process MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); MPIPos.resize(numSeqs+1); MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); //figure out how many sequences you have to align numSeqsPerProcessor = numSeqs / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; } //tell parent you are done. char buf[5]; strcpy(buf, "done"); MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD); } //close files MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else //break up file #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) vector<unsigned long long> positions = m->divideFile(fastaFileNames[i], processors); for (int s = 0; s < (positions.size()-1); s++) { lines.push_back(new linePair(positions[s], positions[(s+1)])); } if(processors == 1){ numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int j = 0; j < lines.size(); j++) { delete lines[j]; } outputTypes.clear(); lines.clear(); delete chimera; return 0; } }else{ processIDS.resize(0); numSeqs = createProcesses(outputFileName, fastaFileNames[i]); rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); //append output files for(int j=1;j<processors;j++){ m->appendFiles((outputFileName + toString(processIDS[j]) + ".temp"), outputFileName); m->mothurRemove((outputFileName + toString(processIDS[j]) + ".temp")); } if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); delete chimera; return 0; } } #else lines.push_back(new linePair(0, 1000)); numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]); if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; } #endif #endif delete chimera; for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); m->mothurOutEndLine(); m->mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraCheckCommand", "execute"); exit(1); } }
//*************************************************************************************************************** int Bellerophon::getChimeras() { try { //create breaking points vector<int> midpoints; midpoints.resize(iters, window); for (int i = 1; i < iters; i++) { midpoints[i] = midpoints[i-1] + increment; } #ifdef USE_MPI int pid, numSeqsPerProcessor; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); numSeqsPerProcessor = iters / processors; //each process hits this only once unsigned long long startPos = pid * numSeqsPerProcessor; if(pid == processors - 1){ numSeqsPerProcessor = iters - pid * numSeqsPerProcessor; } lines.push_back(linePair(startPos, numSeqsPerProcessor)); //fill pref with scores driverChimeras(midpoints, lines[0]); if (m->control_pressed) { return 0; } //each process must send its parts back to pid 0 if (pid == 0) { //receive results for (int j = 1; j < processors; j++) { vector<string> MPIBestSend; for (int i = 0; i < numSeqs; i++) { if (m->control_pressed) { return 0; } MPI_Status status; //receive string int length; MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status); char* buf = new char[length]; MPI_Recv(&buf[0], length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status); string temp = buf; if (temp.length() > length) { temp = temp.substr(0, length); } delete buf; MPIBestSend.push_back(temp); } fillPref(j, MPIBestSend); if (m->control_pressed) { return 0; } } }else { //takes best window for each sequence and turns Preference to string that can be parsed by pid 0. //played with this a bit, but it may be better to try user-defined datatypes with set string lengths?? vector<string> MPIBestSend = getBestWindow(lines[0]); pref.clear(); //send your result to parent for (int i = 0; i < numSeqs; i++) { if (m->control_pressed) { return 0; } int bestLength = MPIBestSend[i].length(); char* buf = new char[bestLength]; memcpy(buf, MPIBestSend[i].c_str(), bestLength); MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD); MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD); delete buf; } MPIBestSend.clear(); } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else //divide breakpoints between processors #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) if(processors == 1){ lines.push_back(linePair(0, iters)); //fill pref with scores driverChimeras(midpoints, lines[0]); }else{ int numSeqsPerProcessor = iters / processors; for (int i = 0; i < processors; i++) { unsigned long long startPos = i * numSeqsPerProcessor; if(i == processors - 1){ numSeqsPerProcessor = iters - i * numSeqsPerProcessor; } lines.push_back(linePair(startPos, numSeqsPerProcessor)); } createProcesses(midpoints); } #else lines.push_back(linePair(0, iters)); ///fill pref with scores driverChimeras(midpoints, lines[0]); #endif #endif return 0; } catch(exception& e) { m->errorOut(e, "Bellerophon", "getChimeras"); exit(1); } }
EstOutput Parsimony::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; CountTable* ct = t->getCountTable(); //if the users enters no groups then give them the score of all groups vector<string> mGroups = m->getGroups(); int numGroups = mGroups.size(); //calculate number of comparsions int numComp = 0; vector< vector<string> > namesOfGroupCombos; for (int r=0; r<numGroups; r++) { for (int l = 0; l < r; l++) { numComp++; vector<string> groups; groups.push_back(mGroups[r]); groups.push_back(mGroups[l]); //cout << globaldata->Groups[r] << '\t' << globaldata->Groups[l] << endl; namesOfGroupCombos.push_back(groups); } } //numComp+1 for AB, AC, BC, ABC if (numComp != 1) { vector<string> groups; if (numGroups == 0) { //get score for all users groups vector<string> tGroups = ct->getNamesOfGroups(); for (int i = 0; i < tGroups.size(); i++) { if (tGroups[i] != "xxx") { groups.push_back(tGroups[i]); //cout << tmap->namesOfGroups[i] << endl; } } namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < mGroups.size(); i++) { groups.push_back(mGroups[i]); //cout << globaldata->Groups[i] << endl; } namesOfGroupCombos.push_back(groups); } } lines.clear(); int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); return data; } catch(exception& e) { m->errorOut(e, "Parsimony", "getValues"); exit(1); } }
int GetMetaCommunityCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } InputData input(sharedfile, "sharedfile"); vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set<string> processedLabels; set<string> userLabels = labels; //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); createProcesses(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); } if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { string saveLabel = lookup[0]->getLabel(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); createProcesses(lookup); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); //restore real lastlabel to save below lookup[0]->setLabel(saveLabel); } lastLabel = lookup[0]->getLabel(); //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } if (m->control_pressed) { return 0; } //get next line to process lookup = input.getSharedRAbundVectors(); } if (m->control_pressed) { return 0; } //output error messages about any remaining user labels set<string>::iterator it; bool needToRun = false; for (it = userLabels.begin(); it != userLabels.end(); it++) { m->mothurOut("Your file does not include the label " + *it); if (processedLabels.count(lastLabel) != 1) { m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine(); needToRun = true; }else { m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine(); } } //run last label if you need to if (needToRun == true) { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } lookup = input.getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); createProcesses(lookup); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } //output files created by command m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "GetMetaCommunityCommand", "execute"); exit(1); } }
int AlignCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, m->getRandomNumber(), true); for (int s = 0; s < candidateFileNames.size(); s++) { if (m->control_pressed) { outputTypes.clear(); return 0; } m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine(); if (outputDir == "") { outputDir += m->hasPath(candidateFileNames[s]); } map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])); string alignFileName = getOutputFileName("fasta", variables); if(align == "noalign") {alignFileName="/dev/null";} string reportFileName = getOutputFileName("alignreport", variables); string accnosFileName = getOutputFileName("accnos", variables); bool hasAccnos = true; int numFastaSeqs = 0; for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); int start = time(NULL); vector<unsigned long long> positions; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(candidateFileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(new linePair(0, 1000)); }else { positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif if(processors == 1){ numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); }else{ numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); } if (m->control_pressed) { m->mothurRemove(accnosFileName); m->mothurRemove(alignFileName); m->mothurRemove(reportFileName); outputTypes.clear(); return 0; } //delete accnos file if its blank else report to user if (m->isBlank(accnosFileName)) { m->mothurRemove(accnosFileName); hasAccnos = false; } else { m->mothurOut("[WARNING]: Some of your sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); if (!flip) { m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } m->mothurOutEndLine(); } outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName); outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName); if (hasAccnos) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); } m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); } //set align file as new current fastafile string currentFasta = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AlignCommand", "execute"); exit(1); } }
int DistanceCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } int startTime = time(NULL); //save number of new sequence numNewFasta = alignDB.getNumSeqs(); //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB if ((oldfastafile != "") && (column != "")) { if (!(sanityCheck())) { return 0; } } if (m->control_pressed) { return 0; } int numSeqs = alignDB.getNumSeqs(); cutoff += 0.005; if (!alignDB.sameLength()) { m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; } string outputFile; map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile)); if (output == "lt") { //does the user want lower triangle phylip formatted file variables["[outputtag]"] = "phylip"; outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); //output numSeqs to phylip formatted dist file }else if (output == "column") { //user wants column format outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); //so we don't accidentally overwrite if (outputFile == column) { string tempcolumn = column + ".old"; rename(column.c_str(), tempcolumn.c_str()); } m->mothurRemove(outputFile); }else { //assume square variables["[outputtag]"] = "square"; outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); } #ifdef USE_MPI int pid, start, end; int tag = 2001; MPI_Status status; MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are //each process gets where it should start and stop in the file if (output != "square") { start = int (sqrt(float(pid)/float(processors)) * numSeqs); end = int (sqrt(float(pid+1)/float(processors)) * numSeqs); }else{ start = int ((float(pid)/float(processors)) * numSeqs); end = int ((float(pid+1)/float(processors)) * numSeqs); } if (output == "column") { MPI_File outMPI; int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; //char* filename = new char[outputFile.length()]; //memcpy(filename, outputFile.c_str(), outputFile.length()); char filename[1024]; strcpy(filename, outputFile.c_str()); MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI); //delete filename; if (pid == 0) { //you are the root process //do your part string outputMyPart; driverMPI(start, end, outMPI, cutoff); if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } //wait on chidren for(int i = 1; i < processors; i++) { if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } char buf[5]; MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); } }else { //you are a child process //do your part driverMPI(start, end, outMPI, cutoff); if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } char buf[5]; strcpy(buf, "done"); //tell parent you are done. MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD); } MPI_File_close(&outMPI); }else { //lower triangle format if (pid == 0) { //you are the root process //do your part string outputMyPart; unsigned long long mySize; if (output != "square"){ driverMPI(start, end, outputFile, mySize); } else { driverMPI(start, end, outputFile, mySize, output); } if (m->control_pressed) { outputTypes.clear(); return 0; } int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; // MPI_File outMPI; MPI_File inMPI; //char* filename = new char[outputFile.length()]; //memcpy(filename, outputFile.c_str(), outputFile.length()); char filename[1024]; strcpy(filename, outputFile.c_str()); MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); //delete filename; //wait on chidren for(int b = 1; b < processors; b++) { unsigned long long fileSize; if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); string outTemp = outputFile + toString(b) + ".temp"; char* buf = new char[outTemp.length()]; memcpy(buf, outTemp.c_str(), outTemp.length()); MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); delete buf; int count = 0; while (count < fileSize) { char buf2[1]; MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status); MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status); count += 1; } MPI_File_close(&inMPI); //deleted on close } MPI_File_close(&outMPI); }else { //you are a child process //do your part unsigned long long size; if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); } else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); } if (m->control_pressed) { return 0; } //tell parent you are done. MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD); } } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //if you don't need to fork anything if(processors == 1){ if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } else { driver(0, numSeqs, outputFile, "square"); } }else{ //you have multiple processors createProcesses(outputFile, numSeqs); } //#else //ifstream inFASTA; //if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } //else { driver(0, numSeqs, outputFile, "square"); } //#endif #endif if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } #ifdef USE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &pid); if (pid == 0) { //only one process should output to screen #endif //if (output == "square") { convertMatrix(outputFile); } ifstream fileHandle; fileHandle.open(outputFile.c_str()); if(fileHandle) { m->gobble(fileHandle); if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff."); m->mothurOutEndLine(); } } //append the old column file to the new one if ((oldfastafile != "") && (column != "")) { //we had to rename the column file so we didnt overwrite above, but we want to keep old name if (outputFile == column) { string tempcolumn = column + ".old"; m->appendFiles(tempcolumn, outputFile); m->mothurRemove(tempcolumn); }else{ m->appendFiles(outputFile, column); m->mothurRemove(outputFile); outputFile = column; } if (outputDir != "") { string newOutputName = outputDir + m->getSimpleName(outputFile); rename(outputFile.c_str(), newOutputName.c_str()); m->mothurRemove(outputFile); outputFile = newOutputName; } } #ifdef USE_MPI } #endif if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } //set phylip file as new current phylipfile string current = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); } } //set column file as new current columnfile itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(outputFile); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - startTime) + " seconds to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); if (m->isTrue(compress)) { m->mothurOut("Compressing..."); m->mothurOutEndLine(); m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine(); system(("gzip -v " + outputFile).c_str()); outputNames.push_back(outputFile + ".gz"); }else { outputNames.push_back(outputFile); } return 0; } catch(exception& e) { m->errorOut(e, "DistanceCommand", "execute"); exit(1); } }
int ClassifySeqsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } string outputMethodTag = method; if(method == "wang"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts); } else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand()); } else if(method == "zap"){ outputMethodTag = search + "_" + outputMethodTag; if (search == "kmer") { classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); } else { classify = new AlignTree(templateFileName, taxonomyFileName, cutoff); } } else { m->mothurOut(search + " is not a valid method option. I will run the command using wang."); m->mothurOutEndLine(); classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts); } if (m->control_pressed) { delete classify; return 0; } for (int s = 0; s < fastaFileNames.size(); s++) { m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); string baseTName = m->getSimpleName(taxonomyFileName); //set rippedTaxName to string RippedTaxName = ""; bool foundDot = false; for (int i = baseTName.length()-1; i >= 0; i--) { if (foundDot && (baseTName[i] != '.')) { RippedTaxName = baseTName[i] + RippedTaxName; } else if (foundDot && (baseTName[i] == '.')) { break; } else if (!foundDot && (baseTName[i] == '.')) { foundDot = true; } } //if (RippedTaxName != "") { RippedTaxName += "."; } if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); } map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])); variables["[tag]"] = RippedTaxName; variables["[tag2]"] = outputMethodTag; string newTaxonomyFile = getOutputFileName("taxonomy", variables); string newaccnosFile = getOutputFileName("accnos", variables); string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp"; string taxSummary = getOutputFileName("taxsummary", variables); if ((method == "knn") && (search == "distance")) { string DistName = getOutputFileName("matchdist", variables); classify->setDistName(DistName); outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName); } outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile); outputNames.push_back(taxSummary); outputTypes["taxsummary"].push_back(taxSummary); int start = time(NULL); int numFastaSeqs = 0; for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); vector<unsigned long long> positions; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastaFileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(new linePair(0, 1000)); }else { positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif if(processors == 1){ numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); }else{ numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); } if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur reversed some your sequences for a better classification. If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile); }else { m->mothurRemove(newaccnosFile); } m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); start = time(NULL); //read namefile if(namefile != "") { m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush(); nameMap.clear(); //remove old names m->readNames(namefileNames[s], nameMap); m->mothurOut(" Done."); m->mothurOutEndLine(); } //output taxonomy with the unclassified bins added ifstream inTax; m->openInputFile(newTaxonomyFile, inTax); ofstream outTax; string unclass = newTaxonomyFile + ".unclass.temp"; m->openOutputFile(unclass, outTax); //get maxLevel from phylotree so you know how many 'unclassified's to add int maxLevel = classify->getMaxLevel(); //read taxfile - this reading and rewriting is done to preserve the confidence scores. string name, taxon; string group = ""; GroupMap* groupMap = NULL; CountTable* ct = NULL; PhyloSummary* taxaSum; if (hasCount) { ct = new CountTable(); ct->readTable(countfileNames[s], true, false); taxaSum = new PhyloSummary(ct, relabund, printlevel); }else { if (groupfile != "") { group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); } taxaSum = new PhyloSummary(groupMap, relabund, printlevel); } while (!inTax.eof()) { if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; } inTax >> name >> taxon; m->gobble(inTax); string newTax = m->addUnclassifieds(taxon, maxLevel, probs); outTax << name << '\t' << newTax << endl; if (namefile != "") { itNames = nameMap.find(name); if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1); }else{ for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); //add it as many times as there are identical seqs } itNames->second.clear(); nameMap.erase(itNames->first); } }else { taxaSum->addSeqToTree(name, newTax); } } inTax.close(); outTax.close(); m->mothurRemove(newTaxonomyFile); rename(unclass.c_str(), newTaxonomyFile.c_str()); if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; } //print summary file ofstream outTaxTree; m->openOutputFile(taxSummary, outTaxTree); taxaSum->print(outTaxTree, output); outTaxTree.close(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; m->mothurRemove(tempTaxonomyFile); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); } delete classify; m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set taxonomy file as new current taxonomyfile string current = ""; itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); } } current = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } } return 0; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "execute"); exit(1); } }
EstOutput Unweighted::getValues(Tree* t, int p, string o) { try { processors = p; outputDir = o; CountTable* ct = t->getCountTable(); //if the users enters no groups then give them the score of all groups int numGroups = m->getNumGroups(); //calculate number of comparsions int numComp = 0; vector< vector<string> > namesOfGroupCombos; for (int r=0; r<numGroups; r++) { for (int l = 0; l < r; l++) { numComp++; vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } if (numComp != 1) { vector<string> groups; if (numGroups == 0) { //get score for all users groups for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) { if ((ct->getNamesOfGroups())[i] != "xxx") { groups.push_back((ct->getNamesOfGroups())[i]); } } namesOfGroupCombos.push_back(groups); }else { for (int i = 0; i < m->getNumGroups(); i++) { groups.push_back((m->getGroups())[i]); } namesOfGroupCombos.push_back(groups); } } lines.clear(); int remainingPairs = namesOfGroupCombos.size(); int startIndex = 0; for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) { int numPairs = remainingPairs; //case for last processor if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); } lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs startIndex = startIndex + numPairs; remainingPairs = remainingPairs - numPairs; } data = createProcesses(t, namesOfGroupCombos, ct); lines.clear(); return data; } catch(exception& e) { m->errorOut(e, "Unweighted", "getValues"); exit(1); } }
int ClassifySeqsCommand::execute(){ try { if (abort) { if (calledHelp) { return 0; } return 2; } string outputMethodTag = method; if(method == "wang"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion()); } else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, util.getRandomNumber(), current->getVersion()); } else if(method == "zap"){ outputMethodTag = search + "_" + outputMethodTag; if (search == "kmer") { classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); } else { classify = new AlignTree(templateFileName, taxonomyFileName, cutoff); } } else { m->mothurOut(search + " is not a valid method option. I will run the command using wang."); m->mothurOutEndLine(); classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion()); } if (m->getControl_pressed()) { delete classify; return 0; } m->mothurOut("Classifying sequences from " + fastafile + " ...\n" ); string baseTName = util.getSimpleName(taxonomyFileName); //set rippedTaxName to string RippedTaxName = ""; bool foundDot = false; for (int i = baseTName.length()-1; i >= 0; i--) { if (foundDot && (baseTName[i] != '.')) { RippedTaxName = baseTName[i] + RippedTaxName; } else if (foundDot && (baseTName[i] == '.')) { break; } else if (!foundDot && (baseTName[i] == '.')) { foundDot = true; } } if (outputDir == "") { outputDir += util.hasPath(fastafile); } map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile)); variables["[tag]"] = RippedTaxName; variables["[tag2]"] = outputMethodTag; string newTaxonomyFile = getOutputFileName("taxonomy", variables); string newaccnosFile = getOutputFileName("accnos", variables); string tempTaxonomyFile = outputDir + util.getRootName(util.getSimpleName(fastafile)) + "taxonomy.temp"; string taxSummary = getOutputFileName("taxsummary", variables); if ((method == "knn") && (search == "distance")) { string DistName = getOutputFileName("matchdist", variables); classify->setDistName(DistName); outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName); } outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile); outputNames.push_back(taxSummary); outputTypes["taxsummary"].push_back(taxSummary); long start = time(NULL); int numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastafile); if (!util.isBlank(newaccnosFile)) { m->mothurOut("\n[WARNING]: mothur reversed some your sequences for a better classification. If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences.\n"); outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile); }else { util.mothurRemove(newaccnosFile); } m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences.\n\n"); start = time(NULL); //read namefile map<string, vector<string> > nameMap; map<string, vector<string> >::iterator itNames; if(namefile != "") { m->mothurOut("Reading " + namefile + "..."); cout.flush(); nameMap.clear(); //remove old names util.readNames(namefile, nameMap); m->mothurOut(" Done.\n"); } //output taxonomy with the unclassified bins added ifstream inTax; util.openInputFile(newTaxonomyFile, inTax); ofstream outTax; string unclass = newTaxonomyFile + ".unclass.temp"; util.openOutputFile(unclass, outTax); //get maxLevel from phylotree so you know how many 'unclassified's to add int maxLevel = classify->getMaxLevel(); //read taxfile - this reading and rewriting is done to preserve the confidence scores. string name, taxon; GroupMap* groupMap = NULL; CountTable* ct = NULL; PhyloSummary* taxaSum; if (hasCount) { ct = new CountTable(); ct->readTable(countfile, true, false); taxaSum = new PhyloSummary(ct, relabund, printlevel); }else { if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); } taxaSum = new PhyloSummary(groupMap, relabund, printlevel); } while (!inTax.eof()) { if (m->getControl_pressed()) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete classify; return 0; } inTax >> name; util.gobble(inTax); taxon = util.getline(inTax); util.gobble(inTax); string newTax = util.addUnclassifieds(taxon, maxLevel, probs); outTax << name << '\t' << newTax << endl; if (namefile != "") { itNames = nameMap.find(name); if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file please correct.\n"); exit(1); }else{ //add it as many times as there are identical seqs for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); } itNames->second.clear(); nameMap.erase(itNames->first); } }else { taxaSum->addSeqToTree(name, newTax); } } inTax.close(); outTax.close(); util.mothurRemove(newTaxonomyFile); util.renameFile(unclass, newTaxonomyFile); if (m->getControl_pressed()) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { util.mothurRemove(outputNames[i]); } delete classify; return 0; } //print summary file ofstream outTaxTree; util.openOutputFile(taxSummary, outTaxTree); taxaSum->print(outTaxTree, output); outTaxTree.close(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; util.mothurRemove(tempTaxonomyFile); delete classify; m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences.\n\n"); m->mothurOut("\nOutput File Names: \n"); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); //set taxonomy file as new current taxonomyfile string currentName = ""; itTypes = outputTypes.find("taxonomy"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } } currentName = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } } return 0; } catch(exception& e) { m->errorOut(e, "ClassifySeqsCommand", "execute"); exit(1); } }
int ChimeraPintailCommand::execute(){ try{ if (abort == true) { if (calledHelp) { return 0; } return 2; } for (int s = 0; s < fastaFileNames.size(); s++) { m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine(); int start = time(NULL); //set user options if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); } //check for quantile to save the time string baseName = templatefile; if (templatefile == "saved") { baseName = rdb->getSavedReference(); } string tempQuan = ""; if ((!filter) && (maskfile == "")) { tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan"; }else if ((!filter) && (maskfile != "")) { tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan"; }else if ((filter) && (maskfile != "")) { tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan"; }else if ((filter) && (maskfile == "")) { tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan"; } ifstream FileTest(tempQuan.c_str()); if(FileTest){ bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion()); if (GoodFile) { m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close(); } }else { string tryPath = m->getDefaultPath(); string tempQuan = ""; if ((!filter) && (maskfile == "")) { tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan"; }else if ((!filter) && (maskfile != "")) { tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan"; }else if ((filter) && (maskfile != "")) { tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan"; }else if ((filter) && (maskfile == "")) { tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan"; } ifstream FileTest2(tempQuan.c_str()); if(FileTest2){ bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion()); if (GoodFile) { m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close(); } } } chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir); if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it string outputFileName, accnosFileName; map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])); if (maskfile != "") { variables["[tag]"] = m->getSimpleName(m->getRootName(maskfile)); } outputFileName = getOutputFileName("chimera", variables); accnosFileName = getOutputFileName("accnos", variables); if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } if (chimera->getUnaligned()) { m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); delete chimera; return 0; } templateSeqsLength = chimera->getLength(); #ifdef USE_MPI int pid, numSeqsPerProcessor; int tag = 2001; vector<unsigned long long> MPIPos; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); MPI_File inMPI; MPI_File outMPI; MPI_File outMPIAccnos; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; char outFilename[1024]; strcpy(outFilename, outputFileName.c_str()); char outAccnosFilename[1024]; strcpy(outAccnosFilename, accnosFileName.c_str()); char inFileName[1024]; strcpy(inFileName, fastaFileNames[s].c_str()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; } if (pid == 0) { //you are the root process MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs //send file positions to all processes for(int i = 1; i < processors; i++) { MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); } //figure out how many sequences you have to align numSeqsPerProcessor = numSeqs / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } //do your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; } }else{ //you are a child process MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); MPIPos.resize(numSeqs+1); MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); //figure out how many sequences you have to align numSeqsPerProcessor = numSeqs / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; } //do your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos); if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; } } //close files MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else //break up file #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } if(processors == 1){ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } }else{ processIDS.resize(0); numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str()); //append output files for(int i=1;i<processors;i++){ m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName); m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp")); } //append output files for(int i=1;i<processors;i++){ m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName); m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp")); } if (m->control_pressed) { m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } } #else lines.push_back(new linePair(0, 1000)); numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName); if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } #endif #endif delete chimera; for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName); outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); } //set accnos file as new current accnosfile string current = ""; itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "execute"); exit(1); } }
int AlignCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, rand()); for (int s = 0; s < candidateFileNames.size(); s++) { if (m->control_pressed) { outputTypes.clear(); return 0; } m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine(); if (outputDir == "") { outputDir += m->hasPath(candidateFileNames[s]); } string alignFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align"; string reportFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align.report"; string accnosFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "flip.accnos"; bool hasAccnos = true; int numFastaSeqs = 0; for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); int start = time(NULL); #ifdef USE_MPI int pid, numSeqsPerProcessor; int tag = 2001; vector<unsigned long long> MPIPos; MPIWroteAccnos = false; MPI_Status status; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_Comm_size(MPI_COMM_WORLD, &processors); MPI_File inMPI; MPI_File outMPIAlign; MPI_File outMPIReport; MPI_File outMPIAccnos; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; char outAlignFilename[1024]; strcpy(outAlignFilename, alignFileName.c_str()); char outReportFilename[1024]; strcpy(outReportFilename, reportFileName.c_str()); char outAccnosFilename[1024]; strcpy(outAccnosFilename, accnosFileName.c_str()); char inFileName[1024]; strcpy(inFileName, candidateFileNames[s].c_str()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outAlignFilename, outMode, MPI_INFO_NULL, &outMPIAlign); MPI_File_open(MPI_COMM_WORLD, outReportFilename, outMode, MPI_INFO_NULL, &outMPIReport); MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; } if (pid == 0) { //you are the root process MPIPos = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs //send file positions to all processes for(int i = 1; i < processors; i++) { MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); } //figure out how many sequences you have to align numSeqsPerProcessor = numFastaSeqs / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; } for (int i = 1; i < processors; i++) { bool tempResult; MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status); if (tempResult != 0) { MPIWroteAccnos = true; } } }else{ //you are a child process MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); MPIPos.resize(numFastaSeqs+1); MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); //figure out how many sequences you have to align numSeqsPerProcessor = numFastaSeqs / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; } //align your part driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; } MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); } //close files MPI_File_close(&inMPI); MPI_File_close(&outMPIAlign); MPI_File_close(&outMPIReport); MPI_File_close(&outMPIAccnos); //delete accnos file if blank if (pid == 0) { //delete accnos file if its blank else report to user if (MPIWroteAccnos) { m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); if (!flip) { m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } m->mothurOutEndLine(); }else { //MPI_Info info; //MPI_File_delete(outAccnosFilename, info); hasAccnos = false; m->mothurRemove(accnosFileName); } } #else vector<unsigned long long> positions; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) positions = m->divideFile(candidateFileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } #else if (processors == 1) { lines.push_back(new linePair(0, 1000)); }else { positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } } #endif if(processors == 1){ numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); }else{ numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); } if (m->control_pressed) { m->mothurRemove(accnosFileName); m->mothurRemove(alignFileName); m->mothurRemove(reportFileName); outputTypes.clear(); return 0; } //delete accnos file if its blank else report to user if (m->isBlank(accnosFileName)) { m->mothurRemove(accnosFileName); hasAccnos = false; } else { m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + "."); if (!flip) { m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); }else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); } m->mothurOutEndLine(); } #endif #ifdef USE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &pid); if (pid == 0) { //only one process should output to screen #endif outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName); outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName); if (hasAccnos) { outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName); } #ifdef USE_MPI } #endif m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); } //set align file as new current fastafile string currentFasta = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "AlignCommand", "execute"); exit(1); } }
//********************************************************************************************************************** int SparccCommand::process(vector<SharedRAbundVector*>& lookup){ try { cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); vector<vector<float> > sharedVector; vector<string> otuNames = m->currentSharedBinLabels; //fill sharedVector to pass to CalcSparcc for (int i = 0; i < lookup.size(); i++) { vector<int> abunds = lookup[i]->getAbundances(); vector<float> temp; for (int j = 0; j < abunds.size(); j++) { temp.push_back((float) abunds[j]); } sharedVector.push_back(temp); } int numOTUs = (int)sharedVector[0].size(); int numGroups = lookup.size(); map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = lookup[0]->getLabel(); string relAbundFileName = getOutputFileName("sparccrelabund", variables); ofstream relAbundFile; m->openOutputFile(relAbundFileName, relAbundFile); outputNames.push_back(relAbundFileName); outputTypes["sparccrelabund"].push_back(relAbundFileName); relAbundFile << "OTU\taveRelAbund\n"; for(int i=0;i<numOTUs;i++){ if (m->control_pressed) { relAbundFile.close(); return 0; } double relAbund = 0.0000; for(int j=0;j<numGroups;j++){ relAbund += sharedVector[j][i]/(double)lookup[j]->getNumSeqs(); } relAbundFile << otuNames[i] <<'\t' << relAbund / (double) numGroups << endl; } relAbundFile.close(); CalcSparcc originalData(sharedVector, maxIterations, numSamplings, normalizeMethod); vector<vector<float> > origCorrMatrix = originalData.getRho(); string correlationFileName = getOutputFileName("corr", variables); ofstream correlationFile; m->openOutputFile(correlationFileName, correlationFile); outputNames.push_back(correlationFileName); outputTypes["corr"].push_back(correlationFileName); correlationFile.setf(ios::fixed, ios::floatfield); correlationFile.setf(ios::showpoint); for(int i=0;i<numOTUs;i++){ correlationFile << '\t' << otuNames[i]; } correlationFile << endl; for(int i=0;i<numOTUs;i++){ correlationFile << otuNames[i]; for(int j=0;j<numOTUs;j++){ correlationFile << '\t' << origCorrMatrix[i][j]; } correlationFile << endl; } if(numPermutations != 0){ vector<vector<float> > pValues = createProcesses(sharedVector, origCorrMatrix); if (m->control_pressed) { return 0; } string pValueFileName = getOutputFileName("pvalue", variables); ofstream pValueFile; m->openOutputFile(pValueFileName, pValueFile); outputNames.push_back(pValueFileName); outputTypes["pvalue"].push_back(pValueFileName); pValueFile.setf(ios::fixed, ios::floatfield); pValueFile.setf(ios::showpoint); for(int i=0;i<numOTUs;i++){ pValueFile << '\t' << otuNames[i]; } pValueFile << endl; for(int i=0;i<numOTUs;i++){ pValueFile << otuNames[i]; for(int j=0;j<numOTUs;j++){ pValueFile << '\t' << pValues[i][j]; } pValueFile << endl; } } return 0; } catch(exception& e) { m->errorOut(e, "SparccCommand", "process"); exit(1); } }
int ChopSeqsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } map<string, string> variables; string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)); string outputFileName = getOutputFileName("fasta", variables); outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName); string outputFileNameAccnos = getOutputFileName("accnos", variables); string fastafileTemp = ""; if (qualfile != "") { fastafileTemp = outputFileName + ".qualFile.Positions.temp"; } vector<unsigned long long> positions; vector<linePair> lines; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafile, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else int numSeqs = 0; positions = m->setFilePosFasta(fastafile, numSeqs); if (numSeqs < processors) { processors = numSeqs; } //figure out how many sequences you have to process int numSeqsPerProcessor = numSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif bool wroteAccnos = false; if(processors == 1) { wroteAccnos = driver(lines[0], fastafile, outputFileName, outputFileNameAccnos, fastafileTemp); } else { wroteAccnos = createProcesses(lines, fastafile, outputFileName, outputFileNameAccnos, fastafileTemp); } if (m->control_pressed) { return 0; } if (qualfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(qualfile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(qualfile)); string outputQualFileName = getOutputFileName("qfile", variables); outputNames.push_back(outputQualFileName); outputTypes["qfile"].push_back(outputQualFileName); processQual(outputQualFileName, fastafileTemp); m->mothurRemove(fastafileTemp); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if (wroteAccnos) { outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos); //use remove.seqs to create new name, group and count file if ((countfile != "") || (namefile != "") || (groupfile != "")) { string inputString = "accnos=" + outputFileNameAccnos; if (countfile != "") { inputString += ", count=" + countfile; } else{ if (namefile != "") { inputString += ", name=" + namefile; } if (groupfile != "") { inputString += ", group=" + groupfile; } } m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; Command* removeCommand = new RemoveSeqsCommand(inputString); removeCommand->execute(); map<string, vector<string> > filenames = removeCommand->getOutputFiles(); delete removeCommand; m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); if (groupfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)); string outGroup = getOutputFileName("group", variables); m->renameFile(filenames["group"][0], outGroup); outputNames.push_back(outGroup); outputTypes["group"].push_back(outGroup); } if (namefile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(namefile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile)); string outName = getOutputFileName("name", variables); m->renameFile(filenames["name"][0], outName); outputNames.push_back(outName); outputTypes["name"].push_back(outName); } if (countfile != "") { thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(countfile); } variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile)); string outCount = getOutputFileName("count", variables); m->renameFile(filenames["count"][0], outCount); outputNames.push_back(outCount); outputTypes["count"].push_back(outCount); } } } else { m->mothurRemove(outputFileNameAccnos); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //set fasta file as new current fastafile string current = ""; itTypes = outputTypes.find("fasta"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); } } if (wroteAccnos) { //set accnos file as new current accnosfile itTypes = outputTypes.find("accnos"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); } } itTypes = outputTypes.find("name"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); } } itTypes = outputTypes.find("group"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); } } itTypes = outputTypes.find("count"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); } } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; } catch(exception& e) { m->errorOut(e, "ChopSeqsCommand", "execute"); exit(1); } }