int FilterSeqsCommand::filterSequences() {	
	try {
		
		numSeqs = 0;
		
		for (int s = 0; s < fastafileNames.size(); s++) {
			
				for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
				
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s]));
				string filteredFasta = getOutputFileName("fasta", variables);
            
            vector<unsigned long long> positions;
            if (savedPositions.size() != 0) { positions = savedPositions[s]; }
            else {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				positions = m->divideFile(fastafileNames[s], processors);
#else
                if(processors != 1){
                    int numFastaSeqs = 0;
                    positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
                    if (numFastaSeqs < processors) { processors = numFastaSeqs; }
                }
#endif
            }
		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
			//vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors);
			
			for (int i = 0; i < (positions.size()-1); i++) {
				lines.push_back(new linePair(positions[i], positions[(i+1)]));
			}	
			
				if(processors == 1){
					int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
					numSeqs += numFastaSeqs;
				}else{
					int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); 
					numSeqs += numFastaSeqs;
				}
				
				if (m->control_pressed) {  return 1; }
		#else
            if(processors == 1){
                lines.push_back(new linePair(0, 1000));
				int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
				numSeqs += numFastaSeqs;
            }else {
                int numFastaSeqs = positions.size()-1;
                //positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
                
                //figure out how many sequences you have to process
                int numSeqsPerProcessor = numFastaSeqs / processors;
                for (int i = 0; i < processors; i++) {
                    int startIndex =  i * numSeqsPerProcessor;
                    if(i == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; 	}
                    lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
                }
                
                numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); 
                numSeqs += numFastaSeqs;
            }

				if (m->control_pressed) {  return 1; }
		#endif

			outputNames.push_back(filteredFasta); outputTypes["fasta"].push_back(filteredFasta);
		}

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "FilterSeqsCommand", "filterSequences");
		exit(1);
	}
}
int FilterSeqsCommand::createProcessesRunFilter(string F, string filename, string filteredFastaName) {
	try {
        
        int process = 1;
		int num = 0;
		processIDS.clear();
        bool recalc = false;
        
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
		
		
		//loop through and create all the processes you want
		while (process != processors) {
			pid_t pid = fork();
			
			if (pid > 0) {
				processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
				process++;
			}else if (pid == 0){
				string filteredFasta = filename + m->mothurGetpid(process) + ".temp";
				num = driverRunFilter(F, filteredFasta, filename, lines[process]);
				
				//pass numSeqs to parent
				ofstream out;
				string tempFile = filename +  m->mothurGetpid(process) + ".num.temp";
				m->openOutputFile(tempFile, out);
				out << num << endl;
				out.close();
				
				exit(0);
			}else { 
                m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process;
                for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                //wait to die
                for (int i=0;i<processIDS.size();i++) {
                    int temp = processIDS[i];
                    wait(&temp);
                }
                m->control_pressed = false;
                for (int i=0;i<processIDS.size();i++) {
                    m->mothurRemove(filename + (toString(processIDS[i]) + ".temp"));
                    m->mothurRemove(filename + (toString(processIDS[i]) + ".num.temp"));
                }
                recalc = true;
                break;
			}
		}
        
        if (recalc) {
            //test line, also set recalc to true.
            //for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false;  for (int i=0;i<processIDS.size();i++) {m->mothurRemove(filename + (toString(processIDS[i]) + ".temp"));m->mothurRemove(filename + (toString(processIDS[i]) + ".num.temp"));}processors=3; m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(processors) + "\n");
            
            //redo file divide
            for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
            vector<unsigned long long> positions = m->divideFile(filename, processors);
            for (int i = 0; i < (positions.size()-1); i++) {  lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
            
            num = 0;
            processIDS.resize(0);
            process = 1;
            
            //loop through and create all the processes you want
            while (process != processors) {
                pid_t pid = fork();
                
                if (pid > 0) {
                    processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                    process++;
                }else if (pid == 0){
                    string filteredFasta = filename + m->mothurGetpid(process) + ".temp";
                    num = driverRunFilter(F, filteredFasta, filename, lines[process]);
                    
                    //pass numSeqs to parent
                    ofstream out;
                    string tempFile = filename +  m->mothurGetpid(process) + ".num.temp";
                    m->openOutputFile(tempFile, out);
                    out << num << endl;
                    out.close();
                    
                    exit(0);
                }else { 
                    m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
                    for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                    exit(0);
                }
            }

        }
		
        num = driverRunFilter(F, filteredFastaName, filename, lines[0]);
        
		//force parent to wait until all the processes are done
		for (int i=0;i<processIDS.size();i++) { 
			int temp = processIDS[i];
			wait(&temp);
		}	
					
		for (int i = 0; i < processIDS.size(); i++) {
			ifstream in;
			string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
			m->openInputFile(tempFile, in);
			if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
			in.close(); m->mothurRemove(tempFile);
            
            m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filteredFastaName);
            m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
		}
               
#else
        
        //////////////////////////////////////////////////////////////////////////////////////////////////////
		//Windows version shared memory, so be careful when passing variables through the filterData struct. 
		//Above fork() will clone, so memory is separate, but that's not the case with windows, 
		//Taking advantage of shared memory to allow both threads to add info to F.
		//////////////////////////////////////////////////////////////////////////////////////////////////////
		
		vector<filterRunData*> pDataArray; 
		DWORD   dwThreadIdArray[processors-1];
		HANDLE  hThreadArray[processors-1]; 
		
		//Create processor worker threads.
		for( int i=0; i<processors-1; i++){
			
            string extension = "";
			if (i != 0) { extension = toString(i) + ".temp"; }
            
			filterRunData* tempFilter = new filterRunData(filter, filename, (filteredFastaName + extension), m, lines[i]->start, lines[i]->end, alignmentLength, i);
			pDataArray.push_back(tempFilter);
			processIDS.push_back(i);
            
			hThreadArray[i] = CreateThread(NULL, 0, MyRunFilterThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
		}
        
        num = driverRunFilter(F, (filteredFastaName + toString(processors-1) + ".temp"), filename, lines[processors-1]);
        
		//Wait until all threads have terminated.
		WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
		
		//Close all thread handles and free memory allocations.
		for(int i=0; i < pDataArray.size(); i++){
			num += pDataArray[i]->count;
            if (pDataArray[i]->count != pDataArray[i]->end) {
                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
            }
            CloseHandle(hThreadArray[i]);
			delete pDataArray[i];
		}
        
        for (int i = 1; i < processors; i++) {
            m->appendFiles((filteredFastaName + toString(i) + ".temp"), filteredFastaName);
            m->mothurRemove((filteredFastaName + toString(i) + ".temp"));
		}
#endif	
        
        return num;
        
	}
Beispiel #3
0
int FilterSeqsCommand::filterSequences() {	
	try {
		
		numSeqs = 0;
		
		for (int s = 0; s < fastafileNames.size(); s++) {
			
				for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
				
                map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s]));
				string filteredFasta = getOutputFileName("fasta", variables);
#ifdef USE_MPI	
				int pid, numSeqsPerProcessor, num; 
				int tag = 2001;
				vector<unsigned long long>MPIPos;
						
				MPI_Status status; 
				MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
				MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
				
				MPI_File outMPI;
				MPI_File inMPI;
				int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
				int inMode=MPI_MODE_RDONLY; 
				
				char outFilename[1024];
				strcpy(outFilename, filteredFasta.c_str());
			
				char inFileName[1024];
				strcpy(inFileName, fastafileNames[s].c_str());
				
				MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
				MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);

				if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }

				if (pid == 0) { //you are the root process 
					
					MPIPos = m->setFilePosFasta(fastafileNames[s], num); //fills MPIPos, returns numSeqs
					numSeqs += num;
					
					//send file positions to all processes
					for(int i = 1; i < processors; i++) { 
						MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
						MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
					}
					
					//figure out how many sequences you have to do
					numSeqsPerProcessor = num / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = num - pid * numSeqsPerProcessor; 	}
					
				
					//do your part
					driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
					
					if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }
					
					//wait on chidren
					for(int i = 1; i < processors; i++) { 
						char buf[5];
						MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
					}
					
				}else { //you are a child process
					MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
					MPIPos.resize(num+1);
					numSeqs += num;
					MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
					
					//figure out how many sequences you have to align
					numSeqsPerProcessor = num / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = num - pid * numSeqsPerProcessor; 	}
					
					
					//align your part
					driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);		
					
					if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }
					
					char buf[5];
					strcpy(buf, "done"); 
					
					//tell parent you are done.
					MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
				}
				
				MPI_File_close(&outMPI);
				MPI_File_close(&inMPI);
				MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
				
#else
            
            vector<unsigned long long> positions;
            if (savedPositions.size() != 0) { positions = savedPositions[s]; }
            else {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				positions = m->divideFile(fastafileNames[s], processors);
#else
                if(processors != 1){
                    int numFastaSeqs = 0;
                    positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
                    if (positions.size() < processors) { processors = positions.size(); }
                }
#endif
            }
		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
			//vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors);
			
			for (int i = 0; i < (positions.size()-1); i++) {
				lines.push_back(new linePair(positions[i], positions[(i+1)]));
			}	
			
				if(processors == 1){
					int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
					numSeqs += numFastaSeqs;
				}else{
					int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); 
					numSeqs += numFastaSeqs;
				}
				
				if (m->control_pressed) {  return 1; }
		#else
            if(processors == 1){
                lines.push_back(new linePair(0, 1000));
				int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
				numSeqs += numFastaSeqs;
            }else {
                int numFastaSeqs = positions.size()-1;
                //positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
                
                //figure out how many sequences you have to process
                int numSeqsPerProcessor = numFastaSeqs / processors;
                for (int i = 0; i < processors; i++) {
                    int startIndex =  i * numSeqsPerProcessor;
                    if(i == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; 	}
                    lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
                }
                
                numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); 
                numSeqs += numFastaSeqs;
            }

				if (m->control_pressed) {  return 1; }
		#endif
#endif
			outputNames.push_back(filteredFasta); outputTypes["fasta"].push_back(filteredFasta);
		}

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "FilterSeqsCommand", "filterSequences");
		exit(1);
	}
}