コード例 #1
0
int SummaryQualCommand::createProcessesCreateSummary(vector<int>& position, vector<int>& averageQ, vector< vector<int> >& scores, string filename) {
	try {
		int process = 1;
		int numSeqs = 0;
		processIDS.clear();
        bool recalc = false;
		
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
		
		//loop through and create all the processes you want
		while (process != processors) {
			pid_t pid = fork();
			
			if (pid > 0) {
				processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
				process++;
			}else if (pid == 0){
				numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[process]);
				
				//pass numSeqs to parent
				ofstream out;
				string tempFile = qualfile + m->mothurGetpid(process) + ".num.temp";
				m->openOutputFile(tempFile, out);
				
				out << numSeqs << endl;
				out << position.size() << endl;
				for (int k = 0; k < position.size(); k++)			{		out << position[k] << '\t'; }  out << endl;
				for (int k = 0; k < averageQ.size(); k++)			{		out << averageQ[k] << '\t'; }  out << endl;
				for (int k = 0; k < scores.size(); k++)	{		
					for (int j = 0; j < 41; j++) {
						out << scores[k][j] << '\t'; 
					}
					out << endl;
				}  
				out << endl;
				
				out.close();
				
				exit(0);
			}else { 
                m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process;
                for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                //wait to die
                for (int i=0;i<processIDS.size();i++) {
                    int temp = processIDS[i];
                    wait(&temp);
                }
                m->control_pressed = false;
                for (int i=0;i<processIDS.size();i++) {
                    m->mothurRemove(qualfile + (toString(processIDS[i]) + ".num.temp"));
                }
                recalc = true;
                break;
			}
		}
		
        if (recalc) {
            //test line, also set recalc to true.
            //for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false;  for (int i=0;i<processIDS.size();i++) {m->mothurRemove(qualfile + (toString(processIDS[i]) + ".num.temp"));}processors=3; m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(processors) + "\n");
            
            //redo file divide
            lines.clear();
            vector<unsigned long long> positions = m->divideFile(qualfile, processors);
            for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(linePair(positions[i], positions[(i+1)]));	}
            
            numSeqs = 0;
            processIDS.resize(0);
            process = 1;
            position.clear();
            averageQ.clear();
            scores.clear();
            
            //loop through and create all the processes you want
            while (process != processors) {
                pid_t pid = fork();
                
                if (pid > 0) {
                    processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                    process++;
                }else if (pid == 0){
                    numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[process]);
                    
                    //pass numSeqs to parent
                    ofstream out;
                    string tempFile = qualfile + m->mothurGetpid(process) + ".num.temp";
                    m->openOutputFile(tempFile, out);
                    
                    out << numSeqs << endl;
                    out << position.size() << endl;
                    for (int k = 0; k < position.size(); k++)			{		out << position[k] << '\t'; }  out << endl;
                    for (int k = 0; k < averageQ.size(); k++)			{		out << averageQ[k] << '\t'; }  out << endl;
                    for (int k = 0; k < scores.size(); k++)	{
                        for (int j = 0; j < 41; j++) {
                            out << scores[k][j] << '\t';
                        }
                        out << endl;
                    }  
                    out << endl;
                    
                    out.close();
                    
                    exit(0);
                }else { 
                    m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
                    for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                    exit(0);
                }
            }
        }
        
		//do your part
		numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]);
		
		//force parent to wait until all the processes are done
		for (int i=0;i<processIDS.size();i++) { 
			int temp = processIDS[i];
			wait(&temp);
		}
		
		//parent reads in and combine Filter info
		for (int i = 0; i < processIDS.size(); i++) {
			string tempFilename = qualfile + toString(processIDS[i]) + ".num.temp";
			ifstream in;
			m->openInputFile(tempFilename, in);
			
			int temp, tempNum;
			in >> tempNum; m->gobble(in); numSeqs += tempNum;
			in >> tempNum; m->gobble(in);
			
			if (position.size() < tempNum) { position.resize(tempNum, 0); }
			if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); }
			if (scores.size() < tempNum) { 
				scores.resize(tempNum); 
				for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); }
			}
			
			for (int k = 0; k < tempNum; k++)			{		in >> temp; position[k]	+= temp;			}		m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{		in >> temp; averageQ[k] += temp; 		}		m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{	
				for (int j = 0; j < 41; j++) {
					in >> temp; scores[k][j] += temp;
					m->gobble(in);
				}	
			}
			
			in.close();
			m->mothurRemove(tempFilename);
		}
		
#else
		//////////////////////////////////////////////////////////////////////////////////////////////////////
		//Windows version shared memory, so be careful when passing variables through the seqSumQualData struct. 
		//Above fork() will clone, so memory is separate, but that's not the case with windows, 
		//Taking advantage of shared memory to pass results vectors.
		//////////////////////////////////////////////////////////////////////////////////////////////////////
		
		vector<seqSumQualData*> pDataArray; 
		DWORD   dwThreadIdArray[processors];
		HANDLE  hThreadArray[processors]; 
		
        bool hasNameMap = false;
        if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
        
		//Create processor worker threads.
		for( int i=0; i<processors; i++ ){
			
			// Allocate memory for thread data.
			seqSumQualData* tempSum = new seqSumQualData(filename, m, lines[i].start, lines[i].end, hasNameMap, nameMap);
			pDataArray.push_back(tempSum);
			processIDS.push_back(i);
        
			hThreadArray[i] = CreateThread(NULL, 0, MySeqSumQualThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
		}
		
		//Wait until all threads have terminated.
		WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
		
		//Close all thread handles and free memory allocations.
		for(int i=0; i < pDataArray.size(); i++){
			numSeqs += pDataArray[i]->numSeqs;
            if (pDataArray[i]->count != pDataArray[i]->end) {
                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
            }
            int tempNum = pDataArray[i]->position.size();
            if (position.size() < tempNum) { position.resize(tempNum, 0); }
			if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); }
			if (scores.size() < tempNum) { 
				scores.resize(tempNum); 
				for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); }
			}
            
            for (int k = 0; k < tempNum; k++)			{		 position[k]    +=  pDataArray[i]->position[k];         }		
			for (int k = 0; k < tempNum; k++)			{		 averageQ[k]    +=  pDataArray[i]->averageQ[k];         }		
			for (int k = 0; k < tempNum; k++)			{	for (int j = 0; j < 41; j++) {  scores[k][j] += pDataArray[i]->scores[k][j];   }	}

			CloseHandle(hThreadArray[i]);
			delete pDataArray[i];
		}
#endif		
		return numSeqs;
	}
	catch(exception& e) {
		m->errorOut(e, "SummaryQualCommand", "createProcessesCreateSummary");
		exit(1);
	}
}
コード例 #2
0
//***************************************************************************************************************
int SummaryQualCommand::execute(){
	try{
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		int start = time(NULL);
		int numSeqs = 0;
		
		vector<int> position;
		vector<int> averageQ;
		vector< vector<int> > scores;
				
		if (m->control_pressed) { return 0; }
		
		if (namefile != "") { nameMap = m->readNames(namefile); }
		else if (countfile != "") {
            CountTable ct;
            ct.readTable(countfile, false, false);
            nameMap = ct.getNameMap();
        }
        
		vector<unsigned long long> positions; 
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
		positions = m->divideFile(qualfile, processors);
		for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(linePair(positions[i], positions[(i+1)]));	}
#else	
		if (processors == 1) {
			lines.push_back(linePair(0, 1000)); 
		}else {
			positions = m->setFilePosFasta(qualfile, numSeqs); 
            if (numSeqs < processors) { processors = numSeqs; }
			
			//figure out how many sequences you have to process
			int numSeqsPerProcessor = numSeqs / processors;
			for (int i = 0; i < processors; i++) {
				int startIndex =  i * numSeqsPerProcessor;
				if(i == (processors - 1)){	numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; 	}
				lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
			}
		}
#endif
		
		
		if(processors == 1){ numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]);  }
		else{  numSeqs = createProcessesCreateSummary(position, averageQ, scores, qualfile);  }
		
		if (m->control_pressed) {  return 0; }
		
		//print summary file
        map<string, string> variables; 
		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qualfile));
		string summaryFile = getOutputFileName("summary",variables);
		printQual(summaryFile, position, averageQ, scores);
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		//output results to screen
		cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
		m->mothurOutEndLine();
		m->mothurOut("Position\tNumSeqs\tAverageQ"); m->mothurOutEndLine();
		for (int i = 0; i < position.size(); i+=100) {
			float average = averageQ[i] / (float) position[i];
			cout << i << '\t' << position[i] << '\t' << average;
			m->mothurOutJustToLog(toString(i) + "\t" + toString(position[i]) + "\t" + toString(average)); m->mothurOutEndLine();
		}
		
		m->mothurOutEndLine();
		m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(summaryFile); m->mothurOutEndLine();	outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
		m->mothurOutEndLine();
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "SummaryQualCommand", "execute");
		exit(1);
	}
}
コード例 #3
0
ファイル: seqsummarycommand.cpp プロジェクト: jklynch/mothur
int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile) {
	try {
		int process = 1;
		int num = 0;
		processIDS.clear();
		
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
		
		//loop through and create all the processes you want
		while (process != processors) {
			int pid = fork();
			
			if (pid > 0) {
				processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
				process++;
			}else if (pid == 0){
				num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + toString(getpid()) + ".temp", lines[process]);
				
				//pass numSeqs to parent
				ofstream out;
				string tempFile = fastafile + toString(getpid()) + ".num.temp";
				m->openOutputFile(tempFile, out);
				
				out << num << endl;
				out << startPosition.size() << endl;
				for (int k = 0; k < startPosition.size(); k++)		{		out << startPosition[k] << '\t'; }  out << endl;
				for (int k = 0; k < endPosition.size(); k++)		{		out << endPosition[k] << '\t'; }  out << endl;
				for (int k = 0; k < seqLength.size(); k++)			{		out << seqLength[k] << '\t'; }  out << endl;
				for (int k = 0; k < ambigBases.size(); k++)			{		out << ambigBases[k] << '\t'; }  out << endl;
				for (int k = 0; k < longHomoPolymer.size(); k++)	{		out << longHomoPolymer[k] << '\t'; }  out << endl;
				
				out.close();
				
				exit(0);
			}else { 
				m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
				for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
				exit(0);
			}
		}
		
		//do your part
		num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile, lines[0]);

		//force parent to wait until all the processes are done
		for (int i=0;i<processIDS.size();i++) { 
			int temp = processIDS[i];
			wait(&temp);
		}
		
		//parent reads in and combine Filter info
		for (int i = 0; i < processIDS.size(); i++) {
			string tempFilename = fastafile + toString(processIDS[i]) + ".num.temp";
			ifstream in;
			m->openInputFile(tempFilename, in);
			
			int temp, tempNum;
			in >> tempNum; m->gobble(in); num += tempNum;
			in >> tempNum; m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{		in >> temp; startPosition.push_back(temp);		}		m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{		in >> temp; endPosition.push_back(temp);		}		m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{		in >> temp; seqLength.push_back(temp);			}		m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{		in >> temp; ambigBases.push_back(temp);			}		m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{		in >> temp; longHomoPolymer.push_back(temp);	}		m->gobble(in);
				
			in.close();
			m->mothurRemove(tempFilename);
			
			m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
			m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
		}
		
#else
		//////////////////////////////////////////////////////////////////////////////////////////////////////
		//Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
		//Above fork() will clone, so memory is separate, but that's not the case with windows, 
		//Taking advantage of shared memory to allow both threads to add info to vectors.
		//////////////////////////////////////////////////////////////////////////////////////////////////////
		
		vector<seqSumData*> pDataArray; 
		DWORD   dwThreadIdArray[processors-1];
		HANDLE  hThreadArray[processors-1]; 
        
		bool hasNameMap = false;
        if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
        
		//Create processor worker threads.
		for( int i=0; i<processors-1; i++ ){
            
            string extension = "";
            if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
			// Allocate memory for thread data.
			seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, hasNameMap, nameMap);
			pDataArray.push_back(tempSum);
			
			//MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
			//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
			hThreadArray[i] = CreateThread(NULL, 0, MySeqSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
		}
		
        //do your part
		num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, (sumFile+toString(processors-1)+".temp"), lines[processors-1]);
        processIDS.push_back(processors-1);

		//Wait until all threads have terminated.
		WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
		
		//Close all thread handles and free memory allocations.
		for(int i=0; i < pDataArray.size(); i++){
			num += pDataArray[i]->count;
            if (pDataArray[i]->count != pDataArray[i]->end) {
                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
            }
            for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) {	startPosition.push_back(pDataArray[i]->startPosition[k]);       }
			for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) {	endPosition.push_back(pDataArray[i]->endPosition[k]);       }
            for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) {	seqLength.push_back(pDataArray[i]->seqLength[k]);       }
            for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) {	ambigBases.push_back(pDataArray[i]->ambigBases[k]);       }
            for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) {	longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]);       }
			CloseHandle(hThreadArray[i]);
			delete pDataArray[i];
		}
    
		//append files
		for(int i=0;i<processIDS.size();i++){
			m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
			m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
		}
#endif		
		return num;
	}
	catch(exception& e) {
		m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
		exit(1);
	}
}
コード例 #4
0
ファイル: seqsummarycommand.cpp プロジェクト: jklynch/mothur
int SeqSummaryCommand::execute(){
	try{
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		//set current fasta to fastafile
		m->setFastaFile(fastafile);
		
        map<string, string> variables; 
		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
		string summaryFile = getOutputFileName("summary",variables);
				
		int numSeqs = 0;
		
		vector<int> startPosition;
		vector<int> endPosition;
		vector<int> seqLength;
		vector<int> ambigBases;
		vector<int> longHomoPolymer;
		
		if (namefile != "") { nameMap = m->readNames(namefile); }
        else if (countfile != "") {
            CountTable ct;
            ct.readTable(countfile, false, false);
            nameMap = ct.getNameMap();
        }
		
		if (m->control_pressed) { return 0; }
			
#ifdef USE_MPI	
				int pid, numSeqsPerProcessor; 
				int tag = 2001;
				int startTag = 1; int endTag = 2; int lengthTag = 3; int baseTag = 4; int lhomoTag = 5;
				int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
				vector<unsigned long long> MPIPos;
				
				MPI_Status status; 
				MPI_Status statusOut;
				MPI_File inMPI; 
				MPI_File outMPI; 
				MPI_Comm_size(MPI_COMM_WORLD, &processors);
				MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
							
				char tempFileName[1024];
				strcpy(tempFileName, fastafile.c_str());
				
				char sumFileName[1024];
				strcpy(sumFileName, summaryFile.c_str());
		
				MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
				MPI_File_open(MPI_COMM_WORLD, sumFileName, outMode, MPI_INFO_NULL, &outMPI);
				
				if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); return 0;  }
				
				if (pid == 0) { //you are the root process
						//print header
						string outputString = "seqname\tstart\tend\tnbases\tambigs\tpolymer\tnumSeqs\n";	
						int length = outputString.length();
						char* buf2 = new char[length];
						memcpy(buf2, outputString.c_str(), length);
					
						MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &statusOut);
						delete buf2;
						
						MPIPos = m->setFilePosFasta(fastafile, numSeqs); //fills MPIPos, returns numSeqs
					
						for(int i = 1; i < processors; i++) { 
							MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
							MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
						}
						
						//figure out how many sequences you have to do
						numSeqsPerProcessor = numSeqs / processors;
						int startIndex =  pid * numSeqsPerProcessor;
						if(pid == (processors - 1)){	numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; 	}
						
						//do your part
						MPICreateSummary(startIndex, numSeqsPerProcessor, startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, inMPI, outMPI, MPIPos);
						
				}else { //i am the child process
			
					MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
					MPIPos.resize(numSeqs+1);
					MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
				
					//figure out how many sequences you have to align
					numSeqsPerProcessor = numSeqs / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; 	}
				
					//do your part
					MPICreateSummary(startIndex, numSeqsPerProcessor, startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, inMPI, outMPI, MPIPos);
				}
				
				MPI_File_close(&inMPI);
				MPI_File_close(&outMPI);
				MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
				
				if (pid == 0) {
					//get the info from the child processes
					for(int i = 1; i < processors; i++) { 
						int size;
						MPI_Recv(&size, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);

						vector<int> temp; temp.resize(size+1);
						
						for(int j = 0; j < 5; j++) { 
						
							MPI_Recv(&temp[0], (size+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status); 
							int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what count this is for
							
							if (receiveTag == startTag) { 
								for (int k = 0; k < size; k++) {		startPosition.push_back(temp[k]);	}
							}else if (receiveTag == endTag) { 
								for (int k = 0; k < size; k++) {		endPosition.push_back(temp[k]);	}
							}else if (receiveTag == lengthTag) { 
								for (int k = 0; k < size; k++) {		seqLength.push_back(temp[k]);	}
							}else if (receiveTag == baseTag) { 
								for (int k = 0; k < size; k++) {		ambigBases.push_back(temp[k]);	}
							}else if (receiveTag == lhomoTag) { 
								for (int k = 0; k < size; k++) {		longHomoPolymer.push_back(temp[k]);	}
							}
						} 
					}

				}else{
				
					//send my counts
					int size = startPosition.size();
					MPI_Send(&size, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
					
					startPosition.push_back(startTag);
					int ierr = MPI_Send(&(startPosition[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
					endPosition.push_back(endTag);
					ierr = MPI_Send (&(endPosition[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
					seqLength.push_back(lengthTag);
					ierr = MPI_Send(&(seqLength[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
					ambigBases.push_back(baseTag);
					ierr = MPI_Send(&(ambigBases[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
					longHomoPolymer.push_back(lhomoTag);
					ierr = MPI_Send(&(longHomoPolymer[0]), (size+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
				}
				
				MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
			vector<unsigned long long> positions; 
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				positions = m->divideFile(fastafile, processors);
				for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
			#else
				positions = m->setFilePosFasta(fastafile, numSeqs); 
                if (positions.size() < processors) { processors = positions.size(); }
		
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			#endif
			

			if(processors == 1){
				numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
			}else{
				numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); 
			}
			
			if (m->control_pressed) {  return 0; }
#endif
			
		#ifdef USE_MPI
			if (pid == 0) { 
		#endif
		
		sort(startPosition.begin(), startPosition.end());
		sort(endPosition.begin(), endPosition.end());
		sort(seqLength.begin(), seqLength.end());
		sort(ambigBases.begin(), ambigBases.end());
		sort(longHomoPolymer.begin(), longHomoPolymer.end());
		int size = startPosition.size();
		
		//find means
		unsigned long long meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
		meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
		for (int i = 0; i < size; i++) {
			meanStartPosition += startPosition[i];
			meanEndPosition += endPosition[i];
			meanSeqLength += seqLength[i];
			meanAmbigBases += ambigBases[i];
			meanLongHomoPolymer += longHomoPolymer[i];
		}
                
        double meanstartPosition, meanendPosition, meanseqLength, meanambigBases, meanlongHomoPolymer;
                
		meanstartPosition = meanStartPosition / (double) size; meanendPosition = meanEndPosition /(double) size; meanlongHomoPolymer = meanLongHomoPolymer / (double) size; meanseqLength = meanSeqLength / (double) size; meanambigBases = meanAmbigBases /(double) size;
				
		int ptile0_25	= int(size * 0.025);
		int ptile25		= int(size * 0.250);
		int ptile50		= int(size * 0.500);
		int ptile75		= int(size * 0.750);
		int ptile97_5	= int(size * 0.975);
		int ptile100	= size - 1;
		
		//to compensate for blank sequences that would result in startPosition and endPostion equalling -1
		if (startPosition[0] == -1) {  startPosition[0] = 0;	}
		if (endPosition[0] == -1)	{  endPosition[0] = 0;		}
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		m->mothurOutEndLine();
		m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer\tNumSeqs"); m->mothurOutEndLine();
		m->mothurOut("Minimum:\t" + toString(startPosition[0]) + "\t" + toString(endPosition[0]) + "\t" + toString(seqLength[0]) + "\t" + toString(ambigBases[0]) + "\t" + toString(longHomoPolymer[0]) + "\t" + toString(1)); m->mothurOutEndLine();
		m->mothurOut("2.5%-tile:\t" + toString(startPosition[ptile0_25]) + "\t" + toString(endPosition[ptile0_25]) + "\t" + toString(seqLength[ptile0_25]) + "\t" + toString(ambigBases[ptile0_25]) + "\t"+ toString(longHomoPolymer[ptile0_25]) + "\t" + toString(ptile0_25+1)); m->mothurOutEndLine();
		m->mothurOut("25%-tile:\t" + toString(startPosition[ptile25]) + "\t" + toString(endPosition[ptile25]) + "\t" + toString(seqLength[ptile25]) + "\t" + toString(ambigBases[ptile25]) + "\t" + toString(longHomoPolymer[ptile25]) + "\t" + toString(ptile25+1)); m->mothurOutEndLine();
		m->mothurOut("Median: \t" + toString(startPosition[ptile50]) + "\t" + toString(endPosition[ptile50]) + "\t" + toString(seqLength[ptile50]) + "\t" + toString(ambigBases[ptile50]) + "\t" + toString(longHomoPolymer[ptile50]) + "\t" + toString(ptile50+1)); m->mothurOutEndLine();
		m->mothurOut("75%-tile:\t" + toString(startPosition[ptile75]) + "\t" + toString(endPosition[ptile75]) + "\t" + toString(seqLength[ptile75]) + "\t" + toString(ambigBases[ptile75]) + "\t" + toString(longHomoPolymer[ptile75]) + "\t" + toString(ptile75+1)); m->mothurOutEndLine();
		m->mothurOut("97.5%-tile:\t" + toString(startPosition[ptile97_5]) + "\t" + toString(endPosition[ptile97_5]) + "\t" + toString(seqLength[ptile97_5]) + "\t" + toString(ambigBases[ptile97_5]) + "\t" + toString(longHomoPolymer[ptile97_5]) + "\t" + toString(ptile97_5+1)); m->mothurOutEndLine();
		m->mothurOut("Maximum:\t" + toString(startPosition[ptile100]) + "\t" + toString(endPosition[ptile100]) + "\t" + toString(seqLength[ptile100]) + "\t" + toString(ambigBases[ptile100]) + "\t" + toString(longHomoPolymer[ptile100]) + "\t" + toString(ptile100+1)); m->mothurOutEndLine();
		m->mothurOut("Mean:\t" + toString(meanstartPosition) + "\t" + toString(meanendPosition) + "\t" + toString(meanseqLength) + "\t" + toString(meanambigBases) + "\t" + toString(meanlongHomoPolymer)); m->mothurOutEndLine();

		if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
		else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(startPosition.size())); m->mothurOutEndLine(); }
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(summaryFile); m->mothurOutEndLine();	outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
		m->mothurOutEndLine();
		
		#ifdef USE_MPI
			}
		#endif

        //set fasta file as new current fastafile
		string current = "";
		itTypes = outputTypes.find("summary");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSummaryFile(current); }
		}
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "SeqSummaryCommand", "execute");
		exit(1);
	}
}
コード例 #5
0
 long long SeqSummaryCommand::createProcessesCreateSummary(map<int, long long>& startPosition, map<int, long long>& endPosition, map<int,  long long>& seqLength, map<int,  long long>& ambigBases, map<int,  long long>& longHomoPolymer, string filename, string sumFile) {
	try {
		int process = 1;
		int num = 0;
		processIDS.clear();
        bool recalc = false;
		
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
		
		//loop through and create all the processes you want
		while (process != processors) {
			pid_t pid = fork();
			
			if (pid > 0) {
				processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
				process++;
			}else if (pid == 0){
				num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + m->mothurGetpid(process) + ".temp", lines[process]);
				
				//pass numSeqs to parent
				ofstream out;
				string tempFile = fastafile + m->mothurGetpid(process) + ".num.temp";
				m->openOutputFile(tempFile, out);
				
				out << num << endl;
                out << startPosition.size() << endl;
				for (map<int,  long long>::iterator it = startPosition.begin(); it != startPosition.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                out << endPosition.size() << endl;
				for (map<int,  long long>::iterator it = endPosition.begin(); it != endPosition.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                out << seqLength.size() << endl;
				for (map<int,  long long>::iterator it = seqLength.begin(); it != seqLength.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                out << ambigBases.size() << endl;
				for (map<int,  long long>::iterator it = ambigBases.begin(); it != ambigBases.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                out << longHomoPolymer.size() << endl;
				for (map<int,  long long>::iterator it = longHomoPolymer.begin(); it != longHomoPolymer.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
				out.close();
				
				exit(0);
			}else { 
                m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process;
                for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                //wait to die
                for (int i=0;i<processIDS.size();i++) {
                    int temp = processIDS[i];
                    wait(&temp);
                }
                m->control_pressed = false;
                for (int i=0;i<processIDS.size();i++) {
                    m->mothurRemove(fastafile + (toString(processIDS[i]) + ".num.temp"));
                    m->mothurRemove(sumFile + (toString(processIDS[i]) + ".temp"));
                }
                recalc = true;
                break;
			}
		}
		
        if (recalc) {
            //test line, also set recalc to true.
            //for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } for (int i=0;i<processIDS.size();i++) { int temp = processIDS[i]; wait(&temp); } m->control_pressed = false;  for (int i=0;i<processIDS.size();i++) {m->mothurRemove(fastafile + (toString(processIDS[i]) + ".num.temp"));}processors=3; m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(processors) + "\n");
            
            //redo file divide
            for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
            vector<unsigned long long> positions = m->divideFile(filename, processors);
            for (int i = 0; i < (positions.size()-1); i++) {  lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
            
            startPosition.clear();
            endPosition.clear();
            seqLength.clear();
            ambigBases.clear();
            longHomoPolymer.clear();
            
            num = 0;
            processIDS.resize(0);
            process = 1;
            
            //loop through and create all the processes you want
            while (process != processors) {
                pid_t pid = fork();
                
                if (pid > 0) {
                    processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                    process++;
                }else if (pid == 0){
                    num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + m->mothurGetpid(process) + ".temp", lines[process]);
                    
                    //pass numSeqs to parent
                    ofstream out;
                    string tempFile = fastafile + m->mothurGetpid(process) + ".num.temp";
                    m->openOutputFile(tempFile, out);
                    
                    out << num << endl;
                    out << startPosition.size() << endl;
                    for (map<int,  long long>::iterator it = startPosition.begin(); it != startPosition.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                    out << endPosition.size() << endl;
                    for (map<int,  long long>::iterator it = endPosition.begin(); it != endPosition.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                    out << seqLength.size() << endl;
                    for (map<int,  long long>::iterator it = seqLength.begin(); it != seqLength.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                    out << ambigBases.size() << endl;
                    for (map<int,  long long>::iterator it = ambigBases.begin(); it != ambigBases.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                    out << longHomoPolymer.size() << endl;
                    for (map<int,  long long>::iterator it = longHomoPolymer.begin(); it != longHomoPolymer.end(); it++)		{		out << it->first << '\t' << it->second << endl; }
                    out.close();
                    
                    exit(0);
                }else { 
                    m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
                    for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
                    exit(0);
                }
            }
        }

        
		//do your part
		num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile, lines[0]);

		//force parent to wait until all the processes are done
		for (int i=0;i<processIDS.size();i++) { 
			int temp = processIDS[i];
			wait(&temp);
		}
		
		//parent reads in and combine Filter info
		for (int i = 0; i < processIDS.size(); i++) {
			string tempFilename = fastafile + toString(processIDS[i]) + ".num.temp";
			ifstream in;
			m->openInputFile(tempFilename, in);
			
            long long  tempNum;
			in >> tempNum; m->gobble(in); num += tempNum;
			in >> tempNum; m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{
                 long long first, second;
                in >> first; m->gobble(in); in >> second; m->gobble(in);
                map<int,  long long>::iterator it = startPosition.find(first);
                if (it == startPosition.end()) { startPosition[first] = second; } //first finding of this start position, set count.
                else { it->second += second; } //add counts
            }
            m->gobble(in);
            in >> tempNum; m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{
                 long long first, second;
                in >> first; m->gobble(in); in >> second; m->gobble(in);
                map<int,  long long>::iterator it = endPosition.find(first);
                if (it == endPosition.end()) { endPosition[first] = second; } //first finding of this end position, set count.
                else { it->second += second; } //add counts
            }
            m->gobble(in);
            in >> tempNum; m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{
                 long long first, second;
                in >> first; m->gobble(in); in >> second; m->gobble(in);
                map<int,  long long>::iterator it = seqLength.find(first);
                if (it == seqLength.end()) { seqLength[first] = second; } //first finding of this end position, set count.
                else { it->second += second; } //add counts
            }
            m->gobble(in);
            in >> tempNum; m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{
                 long long first, second;
                in >> first; m->gobble(in); in >> second; m->gobble(in);
                map<int,  long long>::iterator it = ambigBases.find(first);
                if (it == ambigBases.end()) { ambigBases[first] = second; } //first finding of this end position, set count.
                else { it->second += second; } //add counts
            }
            m->gobble(in);
            in >> tempNum; m->gobble(in);
			for (int k = 0; k < tempNum; k++)			{
                 long long first, second;
                in >> first; m->gobble(in); in >> second; m->gobble(in);
                map<int,  long long>::iterator it = longHomoPolymer.find(first);
                if (it == longHomoPolymer.end()) { longHomoPolymer[first] = second; } //first finding of this end position, set count.
                else { it->second += second; } //add counts
            }
            m->gobble(in);
							
			in.close();
			m->mothurRemove(tempFilename);
			
			m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
			m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
		}
		
#else
		//////////////////////////////////////////////////////////////////////////////////////////////////////
		//Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
		//Above fork() will clone, so memory is separate, but that's not the case with windows, 
		//Taking advantage of shared memory to allow both threads to add info to vectors.
		//////////////////////////////////////////////////////////////////////////////////////////////////////
		
		vector<seqSumData*> pDataArray; 
		DWORD   dwThreadIdArray[processors-1];
		HANDLE  hThreadArray[processors-1]; 
        
		bool hasNameMap = false;
        if ((namefile !="") || (countfile != "")) { hasNameMap = true; }
        
		//Create processor worker threads.
		for( int i=0; i<processors-1; i++ ){
            
            string extension = "";
            if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
			// Allocate memory for thread data.
			seqSumData* tempSum = new seqSumData(filename, (sumFile+extension), m, lines[i]->start, lines[i]->end, hasNameMap, nameMap);
			pDataArray.push_back(tempSum);
			
			//MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
			//default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
			hThreadArray[i] = CreateThread(NULL, 0, MySeqSumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
		}
		
        //do your part
		num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, (sumFile+toString(processors-1)+".temp"), lines[processors-1]);
        processIDS.push_back(processors-1);

		//Wait until all threads have terminated.
		WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
		
		//Close all thread handles and free memory allocations.
		for(int i=0; i < pDataArray.size(); i++){
			num += pDataArray[i]->count;
            if (pDataArray[i]->count != pDataArray[i]->end) {
                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
            }
            for (map<int, long long>::iterator it = pDataArray[i]->startPosition.begin(); it != pDataArray[i]->startPosition.end(); it++)		{
                map<int, long long>::iterator itMain = startPosition.find(it->first);
                if (itMain == startPosition.end()) { //newValue
                    startPosition[it->first] = it->second;
                }else { itMain->second += it->second; } //merge counts
            }
            for (map<int, long long>::iterator it = pDataArray[i]->endPosition.begin(); it != pDataArray[i]->endPosition.end(); it++)		{
                map<int, long long>::iterator itMain = endPosition.find(it->first);
                if (itMain == endPosition.end()) { //newValue
                    endPosition[it->first] = it->second;
                }else { itMain->second += it->second; } //merge counts
            }
            for (map<int, long long>::iterator it = pDataArray[i]->seqLength.begin(); it != pDataArray[i]->seqLength.end(); it++)		{
                map<int, long long>::iterator itMain = seqLength.find(it->first);
                if (itMain == seqLength.end()) { //newValue
                    seqLength[it->first] = it->second;
                }else { itMain->second += it->second; } //merge counts
            }
            for (map<int, long long>::iterator it = pDataArray[i]->ambigBases.begin(); it != pDataArray[i]->ambigBases.end(); it++)		{
                map<int, long long>::iterator itMain = ambigBases.find(it->first);
                if (itMain == ambigBases.end()) { //newValue
                    ambigBases[it->first] = it->second;
                }else { itMain->second += it->second; } //merge counts
            }
            for (map<int, long long>::iterator it = pDataArray[i]->longHomoPolymer.begin(); it != pDataArray[i]->longHomoPolymer.end(); it++)		{
                map<int, long long>::iterator itMain = longHomoPolymer.find(it->first);
                if (itMain == longHomoPolymer.end()) { //newValue
                    longHomoPolymer[it->first] = it->second;
                }else { itMain->second += it->second; } //merge counts
            }
			CloseHandle(hThreadArray[i]);
			delete pDataArray[i];
		}
    
		//append files
		for(int i=0;i<processIDS.size();i++){
			m->appendFiles((sumFile + toString(processIDS[i]) + ".temp"), sumFile);
			m->mothurRemove((sumFile + toString(processIDS[i]) + ".temp"));
		}
#endif		
		return num;
	}
	catch(exception& e) {
		m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
		exit(1);
	}
}
コード例 #6
0
int SeqSummaryCommand::execute(){
	try{
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
        int start = time(NULL);
        
		//set current fasta to fastafile
		m->setFastaFile(fastafile);
		
        map<string, string> variables; 
		variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
		string summaryFile = getOutputFileName("summary",variables);
				
		long long numSeqs = 0;
        long long size = 0;
        long long numUniques = 0;
		map<int, long long> startPosition;
		map<int, long long> endPosition;
		map<int, long long> seqLength;
		map<int, long long> ambigBases;
		map<int, long long> longHomoPolymer;
		
        if (namefile != "") { nameMap = m->readNames(namefile); numUniques = nameMap.size(); }
        else if (countfile != "") {
            CountTable ct;
            ct.readTable(countfile, false, false);
            nameMap = ct.getNameMap();
            size = ct.getNumSeqs();
            numUniques = ct.getNumUniqueSeqs();
        }
		
		if (m->control_pressed) { return 0; }
			

			vector<unsigned long long> positions; 
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				positions = m->divideFile(fastafile, processors);
				for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
			#else
				positions = m->setFilePosFasta(fastafile, numSeqs); 
                if (numSeqs < processors) { processors = numSeqs; }
		
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			#endif
			

			if(processors == 1){
				numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
			}else{
				numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); 
			}
			
			if (m->control_pressed) {  return 0; }
			
		
        
        //set size
        if (countfile != "") {}//already set
        else if (namefile == "") { size = numSeqs; }
        else { for (map<int, long long>::iterator it = startPosition.begin(); it != startPosition.end(); it++) { size += it->second; } }
        
        if ((namefile != "") || (countfile != "")) {
            string type = "count";
            if (namefile != "") { type = "name"; }
            if (numSeqs != numUniques) { // do fasta and name/count files match
                m->mothurOut("[ERROR]: Your " + type + " file contains " + toString(numUniques) + " unique sequences, but your fasta file contains " + toString(numSeqs) + ". File mismatch detected, quitting command.\n"); m->control_pressed = true;
            }
        }
        
        if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
        
        long long ptile0_25	= 1+(long long)(size * 0.025); //number of sequences at 2.5%
        long long ptile25		= 1+(long long)(size * 0.250); //number of sequences at 25%
        long long ptile50		= 1+(long long)(size * 0.500);
        long long ptile75		= 1+(long long)(size * 0.750);
        long long ptile97_5	= 1+(long long)(size * 0.975);
        long long ptile100	= (long long)(size);
        vector<int> starts; starts.resize(7,0); vector<int> ends; ends.resize(7,0); vector<int> ambigs; ambigs.resize(7,0); vector<int> lengths; lengths.resize(7,0); vector<int> homops; homops.resize(7,0);
        
		//find means
		long long meanStartPosition, meanEndPosition, meanSeqLength, meanAmbigBases, meanLongHomoPolymer;
        meanStartPosition = 0; meanEndPosition = 0; meanSeqLength = 0; meanAmbigBases = 0; meanLongHomoPolymer = 0;
        //minimum
        if ((startPosition.begin())->first == -1) { starts[0] = 0; }
        else {starts[0] = (startPosition.begin())->first; }
        long long totalSoFar = 0;
        //set all values to min
        starts[1] = starts[0]; starts[2] = starts[0]; starts[3] = starts[0]; starts[4] = starts[0]; starts[5] = starts[0];
        int lastValue = 0;
        for (map<int, long long>::iterator it = startPosition.begin(); it != startPosition.end(); it++) {
            int value = it->first; if (value == -1) { value = 0; }
            meanStartPosition += (value*it->second);
            totalSoFar += it->second;
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  starts[1] = value;   } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) { starts[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  starts[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  starts[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  starts[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {  starts[6] = value; } //save value
            lastValue = totalSoFar;
        }
        starts[6] = (startPosition.rbegin())->first;
        
        if ((endPosition.begin())->first == -1) { ends[0] = 0; }
        else {ends[0] = (endPosition.begin())->first; }
        totalSoFar = 0;
        //set all values to min
        ends[1] = ends[0]; ends[2] = ends[0]; ends[3] = ends[0]; ends[4] = ends[0]; ends[5] = ends[0];
        lastValue = 0;
        for (map<int, long long>::iterator it = endPosition.begin(); it != endPosition.end(); it++) {
            int value = it->first; if (value == -1) { value = 0; }
            meanEndPosition += (value*it->second);
            totalSoFar += it->second;
            
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  ends[1] = value;  } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) { ends[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  ends[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  ends[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  ends[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {   ends[6] = value; } //save value
            lastValue = totalSoFar;
        }
        ends[6] = (endPosition.rbegin())->first;
        
        if ((seqLength.begin())->first == -1) { lengths[0] = 0; }
        else {lengths[0] = (seqLength.begin())->first; }
        //set all values to min
        lengths[1] = lengths[0]; lengths[2] = lengths[0]; lengths[3] = lengths[0]; lengths[4] = lengths[0]; lengths[5] = lengths[0];
        totalSoFar = 0;
        lastValue = 0;
        for (map<int, long long>::iterator it = seqLength.begin(); it != seqLength.end(); it++) {
            int value = it->first;
            meanSeqLength += (value*it->second);
            totalSoFar += it->second;
            
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  lengths[1] = value;  } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) {   lengths[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  lengths[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  lengths[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  lengths[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {  lengths[6] = value; } //save value
            lastValue = totalSoFar;
        }
        lengths[6] = (seqLength.rbegin())->first;
                
        if ((ambigBases.begin())->first == -1) { ambigs[0] = 0; }
        else {ambigs[0] = (ambigBases.begin())->first; }
        //set all values to min
        ambigs[1] = ambigs[0]; ambigs[2] = ambigs[0]; ambigs[3] = ambigs[0]; ambigs[4] = ambigs[0]; ambigs[5] = ambigs[0];
        totalSoFar = 0;
        lastValue = 0;
        for (map<int, long long>::iterator it = ambigBases.begin(); it != ambigBases.end(); it++) {
            int value = it->first;
            meanAmbigBases += (value*it->second);
            totalSoFar += it->second;
            
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  ambigs[1] = value;  } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) {   ambigs[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  ambigs[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  ambigs[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  ambigs[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {  ambigs[6] = value; } //save value
            lastValue = totalSoFar;
        }
        ambigs[6] = (ambigBases.rbegin())->first;
        
        
        if ((longHomoPolymer.begin())->first == -1) { homops[0] = 0; }
        else {homops[0] = (longHomoPolymer.begin())->first; }
        //set all values to min
        homops[1] = homops[0]; homops[2] = homops[0]; homops[3] = homops[0]; homops[4] = homops[0]; homops[5] = homops[0];
        totalSoFar = 0;
        lastValue = 0;
        for (map<int, long long>::iterator it = longHomoPolymer.begin(); it != longHomoPolymer.end(); it++) {
            int value = it->first;
            meanLongHomoPolymer += (it->first*it->second);
            totalSoFar += it->second;
            
            if (((totalSoFar <= ptile0_25) && (totalSoFar > 1)) || ((lastValue < ptile0_25) && (totalSoFar > ptile0_25))){  homops[1] = value;  } //save value
            if (((totalSoFar <= ptile25) && (totalSoFar > ptile0_25)) ||  ((lastValue < ptile25) && (totalSoFar > ptile25))) {   homops[2] = value;  } //save value
            if (((totalSoFar <= ptile50) && (totalSoFar > ptile25)) ||  ((lastValue < ptile50) && (totalSoFar > ptile50))) {  homops[3] = value; } //save value
            if (((totalSoFar <= ptile75) && (totalSoFar > ptile50)) ||  ((lastValue < ptile75) && (totalSoFar > ptile75))) {  homops[4] = value; } //save value
            if (((totalSoFar <= ptile97_5) && (totalSoFar > ptile75)) ||  ((lastValue < ptile97_5) && (totalSoFar > ptile97_5))) {  homops[5] = value;  } //save value
            if ((totalSoFar <= ptile100) && (totalSoFar > ptile97_5)) {  homops[6] = value; } //save value
            lastValue = totalSoFar;
        }
        homops[6] = (longHomoPolymer.rbegin())->first;
        		      
        double meanstartPosition, meanendPosition, meanseqLength, meanambigBases, meanlongHomoPolymer;
                
		meanstartPosition = meanStartPosition / (double) size; meanendPosition = meanEndPosition /(double) size; meanlongHomoPolymer = meanLongHomoPolymer / (double) size; meanseqLength = meanSeqLength / (double) size; meanambigBases = meanAmbigBases /(double) size;
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		m->mothurOutEndLine();
		m->mothurOut("\t\tStart\tEnd\tNBases\tAmbigs\tPolymer\tNumSeqs"); m->mothurOutEndLine();
		m->mothurOut("Minimum:\t" + toString(starts[0]) + "\t" + toString(ends[0]) + "\t" + toString(lengths[0]) + "\t" + toString(ambigs[0]) + "\t" + toString(homops[0]) + "\t" + toString(1)); m->mothurOutEndLine();
		m->mothurOut("2.5%-tile:\t" + toString(starts[1]) + "\t" + toString(ends[1]) + "\t" + toString(lengths[1]) + "\t" + toString(ambigs[1]) + "\t" + toString(homops[1]) + "\t" + toString(ptile0_25)); m->mothurOutEndLine();
		m->mothurOut("25%-tile:\t" + toString(starts[2]) + "\t" + toString(ends[2]) + "\t" + toString(lengths[2]) + "\t" + toString(ambigs[2]) + "\t" + toString(homops[2]) + "\t" + toString(ptile25)); m->mothurOutEndLine();
		m->mothurOut("Median: \t" + toString(starts[3]) + "\t" + toString(ends[3]) + "\t" + toString(lengths[3]) + "\t" + toString(ambigs[3]) + "\t" + toString(homops[3]) + "\t" + toString(ptile50)); m->mothurOutEndLine();
		m->mothurOut("75%-tile:\t" + toString(starts[4]) + "\t" + toString(ends[4]) + "\t" + toString(lengths[4]) + "\t" + toString(ambigs[4]) + "\t" + toString(homops[4]) + "\t" + toString(ptile75)); m->mothurOutEndLine();
		m->mothurOut("97.5%-tile:\t" + toString(starts[5]) + "\t" + toString(ends[5]) + "\t" + toString(lengths[5]) + "\t" + toString(ambigs[5]) + "\t" + toString(homops[5]) + "\t" + toString(ptile97_5)); m->mothurOutEndLine();
		m->mothurOut("Maximum:\t" + toString(starts[6]) + "\t" + toString(ends[6]) + "\t" + toString(lengths[6]) + "\t" + toString(ambigs[6]) + "\t" + toString(homops[6]) + "\t" + toString(ptile100)); m->mothurOutEndLine();
		m->mothurOut("Mean:\t" + toString(meanstartPosition) + "\t" + toString(meanendPosition) + "\t" + toString(meanseqLength) + "\t" + toString(meanambigBases) + "\t" + toString(meanlongHomoPolymer)); m->mothurOutEndLine();
		if ((namefile == "") && (countfile == "")) {  m->mothurOut("# of Seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); }
		else { m->mothurOut("# of unique seqs:\t" + toString(numSeqs)); m->mothurOutEndLine(); m->mothurOut("total # of seqs:\t" + toString(size)); m->mothurOutEndLine(); }
		
		if (m->control_pressed) {  m->mothurRemove(summaryFile); return 0; }
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(summaryFile); m->mothurOutEndLine();	outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile);
		m->mothurOutEndLine();

        if ((namefile == "") && (countfile == "")) {  m->mothurOut("It took " + toString(time(NULL) - start) + " secs to summarize " + toString(numSeqs) + " sequences.\n");  }
        else{ m->mothurOut("It took " + toString(time(NULL) - start) + " secs to summarize " + toString(size) + " sequences.\n");   }
        
        //set fasta file as new current fastafile
		string current = "";
		itTypes = outputTypes.find("summary");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSummaryFile(current); }
		}
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "SeqSummaryCommand", "execute");
		exit(1);
	}
}