int FilterSeqsCommand::filterSequences() { try { numSeqs = 0; for (int s = 0; s < fastafileNames.size(); s++) { for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])); string filteredFasta = getOutputFileName("fasta", variables); vector<unsigned long long> positions; if (savedPositions.size() != 0) { positions = savedPositions[s]; } else { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafileNames[s], processors); #else if(processors != 1){ int numFastaSeqs = 0; positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); if (numFastaSeqs < processors) { processors = numFastaSeqs; } } #endif } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } if(processors == 1){ int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; }else{ int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); numSeqs += numFastaSeqs; } if (m->control_pressed) { return 1; } #else if(processors == 1){ lines.push_back(new linePair(0, 1000)); int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; }else { int numFastaSeqs = positions.size()-1; //positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); numSeqs += numFastaSeqs; } if (m->control_pressed) { return 1; } #endif outputNames.push_back(filteredFasta); outputTypes["fasta"].push_back(filteredFasta); } return 0; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "filterSequences"); exit(1); } }
int FilterSeqsCommand::createProcessesRunFilter(string F, string filename, string filteredFastaName) { try { int process = 1; int num = 0; processIDS.clear(); bool recalc = false; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ string filteredFasta = filename + m->mothurGetpid(process) + ".temp"; num = driverRunFilter(F, filteredFasta, filename, lines[process]); //pass numSeqs to parent ofstream out; string tempFile = filename + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process; for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } //wait to die for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; for (int i=0;i<processIDS.size();i++) { m->mothurRemove(filename + (toString(processIDS[i]) + ".temp")); m->mothurRemove(filename + (toString(processIDS[i]) + ".num.temp")); } recalc = true; break; } } if (recalc) { //test line, also set recalc to true. //for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false; for (int i=0;i<processIDS.size();i++) {m->mothurRemove(filename + (toString(processIDS[i]) + ".temp"));m->mothurRemove(filename + (toString(processIDS[i]) + ".num.temp"));}processors=3; m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(processors) + "\n"); //redo file divide for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); vector<unsigned long long> positions = m->divideFile(filename, processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } num = 0; processIDS.resize(0); process = 1; //loop through and create all the processes you want while (process != processors) { pid_t pid = fork(); if (pid > 0) { processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ string filteredFasta = filename + m->mothurGetpid(process) + ".temp"; num = driverRunFilter(F, filteredFasta, filename, lines[process]); //pass numSeqs to parent ofstream out; string tempFile = filename + m->mothurGetpid(process) + ".num.temp"; m->openOutputFile(tempFile, out); out << num << endl; out.close(); exit(0); }else { m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } exit(0); } } } num = driverRunFilter(F, filteredFastaName, filename, lines[0]); //force parent to wait until all the processes are done for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } for (int i = 0; i < processIDS.size(); i++) { ifstream in; string tempFile = filename + toString(processIDS[i]) + ".num.temp"; m->openInputFile(tempFile, in); if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } in.close(); m->mothurRemove(tempFile); m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filteredFastaName); m->mothurRemove((filename + toString(processIDS[i]) + ".temp")); } #else ////////////////////////////////////////////////////////////////////////////////////////////////////// //Windows version shared memory, so be careful when passing variables through the filterData struct. //Above fork() will clone, so memory is separate, but that's not the case with windows, //Taking advantage of shared memory to allow both threads to add info to F. ////////////////////////////////////////////////////////////////////////////////////////////////////// vector<filterRunData*> pDataArray; DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; //Create processor worker threads. for( int i=0; i<processors-1; i++){ string extension = ""; if (i != 0) { extension = toString(i) + ".temp"; } filterRunData* tempFilter = new filterRunData(filter, filename, (filteredFastaName + extension), m, lines[i]->start, lines[i]->end, alignmentLength, i); pDataArray.push_back(tempFilter); processIDS.push_back(i); hThreadArray[i] = CreateThread(NULL, 0, MyRunFilterThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); } num = driverRunFilter(F, (filteredFastaName + toString(processors-1) + ".temp"), filename, lines[processors-1]); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); //Close all thread handles and free memory allocations. for(int i=0; i < pDataArray.size(); i++){ num += pDataArray[i]->count; if (pDataArray[i]->count != pDataArray[i]->end) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; } CloseHandle(hThreadArray[i]); delete pDataArray[i]; } for (int i = 1; i < processors; i++) { m->appendFiles((filteredFastaName + toString(i) + ".temp"), filteredFastaName); m->mothurRemove((filteredFastaName + toString(i) + ".temp")); } #endif return num; }
int FilterSeqsCommand::filterSequences() { try { numSeqs = 0; for (int s = 0; s < fastafileNames.size(); s++) { for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])); string filteredFasta = getOutputFileName("fasta", variables); #ifdef USE_MPI int pid, numSeqsPerProcessor, num; int tag = 2001; vector<unsigned long long>MPIPos; MPI_Status status; MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are MPI_File outMPI; MPI_File inMPI; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; int inMode=MPI_MODE_RDONLY; char outFilename[1024]; strcpy(outFilename, filteredFasta.c_str()); char inFileName[1024]; strcpy(inFileName, fastafileNames[s].c_str()); MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; } if (pid == 0) { //you are the root process MPIPos = m->setFilePosFasta(fastafileNames[s], num); //fills MPIPos, returns numSeqs numSeqs += num; //send file positions to all processes for(int i = 1; i < processors; i++) { MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD); MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD); } //figure out how many sequences you have to do numSeqsPerProcessor = num / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = num - pid * numSeqsPerProcessor; } //do your part driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; } //wait on chidren for(int i = 1; i < processors; i++) { char buf[5]; MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); } }else { //you are a child process MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); MPIPos.resize(num+1); numSeqs += num; MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); //figure out how many sequences you have to align numSeqsPerProcessor = num / processors; int startIndex = pid * numSeqsPerProcessor; if(pid == (processors - 1)){ numSeqsPerProcessor = num - pid * numSeqsPerProcessor; } //align your part driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos); if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); return 0; } char buf[5]; strcpy(buf, "done"); //tell parent you are done. MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD); } MPI_File_close(&outMPI); MPI_File_close(&inMPI); MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else vector<unsigned long long> positions; if (savedPositions.size() != 0) { positions = savedPositions[s]; } else { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) positions = m->divideFile(fastafileNames[s], processors); #else if(processors != 1){ int numFastaSeqs = 0; positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); if (positions.size() < processors) { processors = positions.size(); } } #endif } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } if(processors == 1){ int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; }else{ int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); numSeqs += numFastaSeqs; } if (m->control_pressed) { return 1; } #else if(processors == 1){ lines.push_back(new linePair(0, 1000)); int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]); numSeqs += numFastaSeqs; }else { int numFastaSeqs = positions.size()-1; //positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); //figure out how many sequences you have to process int numSeqsPerProcessor = numFastaSeqs / processors; for (int i = 0; i < processors; i++) { int startIndex = i * numSeqsPerProcessor; if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); numSeqs += numFastaSeqs; } if (m->control_pressed) { return 1; } #endif #endif outputNames.push_back(filteredFasta); outputTypes["fasta"].push_back(filteredFasta); } return 0; } catch(exception& e) { m->errorOut(e, "FilterSeqsCommand", "filterSequences"); exit(1); } }