int FilterSeqsCommand::filterSequences() { try { numSeqs = 0; for (int s = 0; s < fastafileNames.size(); s++) { for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])); string filteredFasta = getOutputFileName("fasta", variables); vector<unsigned long long> positions; if (savedPositions.size() != 0) { positions = savedPositions[s]; } else { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafileNames[s], processors); #else if(processors != 1){ int numFastaSeqs = 0; positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } } #endif } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } if(processors == 1){ int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; }else{ int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); numSeqs += numFastaSeqs; } if (m->control_pressed) { return 1; } #else if(processors == 1){ lines.push_back(new linePair(0, 1000)); int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; }else { int numFastaSeqs = positions.size()-1; //positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); numSeqs += numFastaSeqs; } if (m->control_pressed) { return 1; } #endif outputNames.push_back(filteredFasta); outputTypes["fasta"].push_back(filteredFasta); } return 0; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "filterSequences"); exit(1); } }
int FilterSeqsCommand::filterSequences() { try { numSeqs = 0; for (int s = 0; s < fastafileNames.size(); s++) { for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])); string filteredFasta = getOutputFileName("fasta", variables); #ifdef USE_MPI int pid, numSeqsPerProcessor, num; int tag = 2001; vector<unsigned long long>MPIPos; MPI_Status status; MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_File outMPI; MPI_File inMPI; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; char outFilename[1024]; strcpy(outFilename, filteredFasta.c_str()); char inFileName[1024]; strcpy(inFileName, fastafileNames[s].c_str()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; } if (pid == 0) { //you are the root process MPIPos = m->setFilePosFasta(fastafileNames[s], num); //fills MPIPos, returns numSeqs numSeqs += num; //send file positions to all processes for(int i = 1; i < processors; i++) { MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD); MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD); } //figure out how many sequences you have to do numSeqsPerProcessor = num / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = num - pid * numSeqsPerProcessor; } //do your part driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; } //wait on chidren for(int i = 1; i < processors; i++) { char buf[5]; MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); } }else { //you are a child process MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); MPIPos.resize(num+1); numSeqs += num; MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); //figure out how many sequences you have to align numSeqsPerProcessor = num / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = num - pid * numSeqsPerProcessor; } //align your part driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; } char buf[5]; strcpy(buf, "done"); //tell parent you are done. MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD); } MPI_File_close(&outMPI); MPI_File_close(&inMPI); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else vector<unsigned long long> positions; if (savedPositions.size() != 0) { positions = savedPositions[s]; } else { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafileNames[s], processors); #else if(processors != 1){ int numFastaSeqs = 0; positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); if (positions.size() < processors) { processors = positions.size(); } } #endif } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } if(processors == 1){ int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; }else{ int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); numSeqs += numFastaSeqs; } if (m->control_pressed) { return 1; } #else if(processors == 1){ lines.push_back(new linePair(0, 1000)); int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; }else { int numFastaSeqs = positions.size()-1; //positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); numSeqs += numFastaSeqs; } if (m->control_pressed) { return 1; } #endif #endif outputNames.push_back(filteredFasta); outputTypes["fasta"].push_back(filteredFasta); } return 0; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "filterSequences"); exit(1); } }