//void alignDriver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename, vector<long long>& numFlipped,MothurOut* m, string align, float match, float misMatch, float gapOpen, float gapExtend, float threshold, bool flip, AlignmentDB* templateDB, string search, long long& count) { long long AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName, string filename, vector<long long>& numFlipped) { try { vector<linePair> lines; vector<unsigned long long> positions; #if defined NON_WINDOWS positions = util.divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } #else long long numFastaSeqs = 0; positions = util.setFilePosFasta(filename, numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; m->mothurOut("Reducing processors to " + toString(numFastaSeqs) + ".\n"); } //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); } #endif //create array of worker threads vector<thread*> workerThreads; vector<alignStruct*> data; long long num = 0; for (int i = 0; i < numFlipped.size(); i++) { numFlipped[i] = 0; } time_t start, end; time(&start); NastReport nast; string nastHeaders = nast.getHeaders(); ofstream out; util.openOutputFile(reportFileName, out); out << nastHeaders; out.close(); auto synchronizedOutputAlignFile = std::make_shared<SynchronizedOutputFile>(alignFileName); auto synchronizedOutputReportFile = std::make_shared<SynchronizedOutputFile>(reportFileName, true); auto synchronizedOutputAccnosFile = std::make_shared<SynchronizedOutputFile>(accnosFName); for (int i = 0; i < processors-1; i++) { OutputWriter* threadAlignWriter = new OutputWriter(synchronizedOutputAlignFile); OutputWriter* threadReportWriter = new OutputWriter(synchronizedOutputReportFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedOutputAccnosFile); alignStruct* dataBundle = new alignStruct(lines[i+1], threadAlignWriter, threadReportWriter, threadAccnosWriter, filename, align, match, misMatch, gapOpen, gapExtend, threshold, flip, templateDB, search); data.push_back(dataBundle); workerThreads.push_back(new thread(alignDriver, dataBundle)); } OutputWriter* threadAlignWriter = new OutputWriter(synchronizedOutputAlignFile); OutputWriter* threadReportWriter = new OutputWriter(synchronizedOutputReportFile); OutputWriter* threadAccnosWriter = new OutputWriter(synchronizedOutputAccnosFile); alignStruct* dataBundle = new alignStruct(lines[0], threadAlignWriter, threadReportWriter, threadAccnosWriter, filename, align, match, misMatch, gapOpen, gapExtend, threshold, flip, templateDB, search); alignDriver(dataBundle); numFlipped[0] = dataBundle->flippedResults[0]; numFlipped[1] = dataBundle->flippedResults[1]; num = dataBundle->numSeqs; for (int i = 0; i < processors-1; i++) { workerThreads[i]->join(); num += data[i]->numSeqs; numFlipped[0] += data[i]->flippedResults[0]; numFlipped[1] += data[i]->flippedResults[1]; delete data[i]->alignWriter; delete data[i]->reportWriter; delete data[i]->accnosWriter; delete data[i]; delete workerThreads[i]; } synchronizedOutputAlignFile->close(); synchronizedOutputReportFile->close(); synchronizedOutputAccnosFile->close(); delete threadAlignWriter; delete threadAccnosWriter; delete threadReportWriter; delete dataBundle; time(&end); m->mothurOut("It took " + toString(difftime(end, start)) + " secs to align " + toString(num) + " sequences.\n\n"); return num; } catch(exception& e) { m->errorOut(e, "AlignCommand", "createProcesses"); exit(1); } }
int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector<unsigned long long>& MPIPos){ try { string outputString = ""; MPI_Status statusReport; MPI_Status statusAlign; MPI_Status statusAccnos; MPI_Status status; int pid; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are NastReport report; if (pid == 0) { outputString = report.getHeaders(); int length = outputString.length(); char* buf = new char[length]; memcpy(buf, outputString.c_str(), length); MPI_File_write_shared(reportFile, buf, length, MPI_CHAR, &statusReport); delete buf; } Alignment* alignment; int longestBase = templateDB->getLongestBase(); if(align == "gotoh") { alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase); } else if(align == "needleman") { alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } else if(align == "blast") { alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch); } else if(align == "noalign") { alignment = new NoAlign(); } else { m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman."); m->mothurOutEndLine(); alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase); } for(int i=0;i<num;i++){ if (m->control_pressed) { delete alignment; return 0; } //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; char* buf4 = new char[length]; //memcpy(buf4, outputString.c_str(), length); MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; delete buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); Sequence* candidateSeq = new Sequence(iss); report.setCandidate(candidateSeq); int origNumBases = candidateSeq->getNumBases(); string originalUnaligned = candidateSeq->getUnaligned(); int numBasesNeeded = origNumBases * threshold; if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file if (candidateSeq->getUnaligned().length() > alignment->getnRows()) { alignment->resize(candidateSeq->getUnaligned().length()+1); } Sequence temp = templateDB->findClosestSequence(candidateSeq); Sequence* templateSeq = &temp; float searchScore = templateDB->getSearchScore(); Nast* nast = new Nast(alignment, candidateSeq, templateSeq); Sequence* copy; Nast* nast2; bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below //since nast does not make a copy of hte sequence passed, and it is used by the reporter below //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place //so this bool tells you if you need to delete it //if there is a possibility that this sequence should be reversed if (candidateSeq->getNumBases() < numBasesNeeded) { string wasBetter = ""; //if the user wants you to try the reverse if (flip) { //get reverse compliment copy = new Sequence(candidateSeq->getName(), originalUnaligned); copy->reverseComplement(); //rerun alignment Sequence temp2 = templateDB->findClosestSequence(copy); Sequence* templateSeq2 = &temp2; searchScore = templateDB->getSearchScore(); nast2 = new Nast(alignment, copy, templateSeq2); //check if any better if (copy->getNumBases() > candidateSeq->getNumBases()) { candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better templateSeq = templateSeq2; delete nast; nast = nast2; needToDeleteCopy = true; wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement."; }else{ wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence."; delete nast2; delete copy; } } //create accnos file with names outputString = candidateSeq->getName() + wasBetter + "\n"; //send results to parent int length = outputString.length(); char* buf = new char[length]; memcpy(buf, outputString.c_str(), length); MPI_File_write_shared(accnosFile, buf, length, MPI_CHAR, &statusAccnos); delete buf; MPIWroteAccnos = true; } report.setTemplate(templateSeq); report.setSearchParameters(search, searchScore); report.setAlignmentParameters(align, alignment); report.setNastParameters(*nast); outputString = ">" + candidateSeq->getName() + "\n" + candidateSeq->getAligned() + "\n"; //send results to parent int length = outputString.length(); char* buf2 = new char[length]; memcpy(buf2, outputString.c_str(), length); MPI_File_write_shared(alignFile, buf2, length, MPI_CHAR, &statusAlign); delete buf2; outputString = report.getReport(); //send results to parent length = outputString.length(); char* buf3 = new char[length]; memcpy(buf3, outputString.c_str(), length); MPI_File_write_shared(reportFile, buf3, length, MPI_CHAR, &statusReport); delete buf3; delete nast; if (needToDeleteCopy) { delete copy; } } delete candidateSeq; //report progress if((i+1) % 100 == 0){ cout << (toString(i+1)) << endl; } } //report progress if((num) % 100 != 0){ cout << (toString(num)) << endl; } return 1; } catch(exception& e) { m->errorOut(e, "AlignCommand", "driverMPI"); exit(1); } }
//********************************************************************************************************************** void alignDriver(alignStruct* params) { try { NastReport report; ifstream inFASTA; params->util.openInputFile(params->inputFilename, inFASTA); inFASTA.seekg(params->filePos.start); bool done = false; long long count = 0; long long numFlipped_0 = 0; long long numFlipped_1 = 0; //moved this into driver to avoid deep copies in windows paralellized version Alignment* alignment; int longestBase = params->templateDB->getLongestBase(); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: template longest base = " + toString(longestBase) + " \n"); } if(params->alignMethod == "gotoh") { alignment = new GotohOverlap(params->gapOpen, params->gapExtend, params->match, params->misMatch, longestBase); } else if(params->alignMethod == "needleman") { alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, longestBase); } else if(params->alignMethod == "blast") { alignment = new BlastAlignment(params->gapOpen, params->gapExtend, params->match, params->misMatch); } else if(params->alignMethod == "noalign") { alignment = new NoAlign(); } else { params->m->mothurOut(params->alignMethod + " is not a valid alignment option. I will run the command using needleman."); params->m->mothurOutEndLine(); alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, longestBase); } while (!done) { if (params->m->getControl_pressed()) { break; } Sequence* candidateSeq = new Sequence(inFASTA); params->util.gobble(inFASTA); report.setCandidate(candidateSeq); int origNumBases = candidateSeq->getNumBases(); string originalUnaligned = candidateSeq->getUnaligned(); int numBasesNeeded = origNumBases * params->threshold; if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) { if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); } alignment->resize(candidateSeq->getUnaligned().length()+2); } float searchScore; Sequence temp = params->templateDB->findClosestSequence(candidateSeq, searchScore); Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned()); Nast* nast = new Nast(alignment, candidateSeq, templateSeq); Sequence* copy; Nast* nast2; bool needToDeleteCopy = false; //this is needed in case you have you enter the ifs below //since nast does not make a copy of hte sequence passed, and it is used by the reporter below //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place //so this bool tells you if you need to delete it //if there is a possibility that this sequence should be reversed if (candidateSeq->getNumBases() < numBasesNeeded) { numFlipped_1++; string wasBetter = ""; //if the user wants you to try the reverse if (params->flip) { //get reverse compliment copy = new Sequence(candidateSeq->getName(), originalUnaligned); copy->reverseComplement(); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: flipping " + candidateSeq->getName() + " \n"); } //rerun alignment Sequence temp2 = params->templateDB->findClosestSequence(copy, searchScore); Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned()); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: closest template " + temp2.getName() + " \n"); } nast2 = new Nast(alignment, copy, templateSeq2); if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: completed Nast2 " + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); } //check if any better if (copy->getNumBases() > candidateSeq->getNumBases()) { candidateSeq->setAligned(copy->getAligned()); //use reverse compliments alignment since its better delete templateSeq; templateSeq = templateSeq2; delete nast; nast = nast2; needToDeleteCopy = true; wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement."; numFlipped_0++; }else{ wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence."; delete nast2; delete templateSeq2; delete copy; } if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: done.\n"); } } //create accnos file with names params->accnosWriter->write(candidateSeq->getName() + wasBetter + "\n"); } report.setTemplate(templateSeq); report.setSearchParameters(params->search, searchScore); report.setAlignmentParameters(params->alignMethod, alignment); report.setNastParameters(*nast); params->alignWriter->write('>' + candidateSeq->getName() + '\n' + candidateSeq->getAligned() + '\n'); params->reportWriter->write(report.getReport()); delete nast; delete templateSeq; if (needToDeleteCopy) { delete copy; } count++; } delete candidateSeq; #if defined NON_WINDOWS unsigned long long pos = inFASTA.tellg(); if ((pos == -1) || (pos >= params->filePos.end)) { break; } #else if (count == params->filePos.end) { break; } #endif //report progress if((count) % 1000 == 0){ params->m->mothurOutJustToScreen(toString(count) + "\n"); } } //report progress if((count) % 1000 != 0){ params->m->mothurOutJustToScreen(toString(count) + "\n"); } params->numSeqs += count; params->flippedResults[0] += numFlipped_0; params->flippedResults[1] += numFlipped_1; delete alignment; inFASTA.close(); } catch(exception& e) { params->m->errorOut(e, "AlignCommand", "driver"); exit(1); } }