コード例 #1
0
ファイル: aligncommand.cpp プロジェクト: mothur/mothur
int AlignCommand::execute(){
	try {
		if (abort) { if (calledHelp) { return 0; }  return 2;	}

		templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, util.getRandomNumber(), true);
		
        if (m->getControl_pressed()) { outputTypes.clear(); return 0; }
        
        time_t start = time(NULL);
        m->mothurOut("\nAligning sequences from " + fastafile + " ...\n" );
        
        if (outputDir == "") {  outputDir += util.hasPath(fastafile); }
        map<string, string> variables; variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
        string alignFileName = getOutputFileName("fasta", variables);
        string reportFileName = getOutputFileName("alignreport", variables);
        string accnosFileName = getOutputFileName("accnos", variables);
        
        bool hasAccnos = true;
        vector<long long> numFlipped;
        numFlipped.push_back(0); //numflipped because reverse was better
        numFlipped.push_back(0); //total number of sequences with over 50% of bases removed
        
        long long numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, fastafile, numFlipped);
        
        if (m->getControl_pressed()) { util.mothurRemove(accnosFileName); util.mothurRemove(alignFileName); util.mothurRemove(reportFileName); outputTypes.clear();  return 0; }
        
        //delete accnos file if its blank else report to user
        if (util.isBlank(accnosFileName)) {  util.mothurRemove(accnosFileName);  hasAccnos = false; }
        else {
            m->mothurOut("[WARNING]: " + toString(numFlipped[1]) + " of your sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
            if (!flip) {
                m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well. flip=t");
            }else{  m->mothurOut("\n[NOTE]: " + toString(numFlipped[0]) + " of your sequences were reversed to produce a better alignment.");  }
            m->mothurOutEndLine();
        }
        
        outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName);
        outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName);
        if (hasAccnos)	{	outputNames.push_back(accnosFileName);	outputTypes["accnos"].push_back(accnosFileName);  }
		
        m->mothurOut("\nIt took " + toString(time(NULL) - start) + " seconds to align " + toString(numFastaSeqs) + " sequences.\n");
        
		//set align file as new current fastafile
		string currentFasta = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; current->setFastaFile(currentFasta); }
		}
		
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i] +"\n"); 	} m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "AlignCommand", "execute");
		exit(1);
	}
}
コード例 #2
0
//**********************************************************************************************************************
int SffMultipleCommand::execute(){
	try {
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		vector<string> sffFiles, oligosFiles;
        readFile(sffFiles, oligosFiles);
        
        string thisOutputDir = outputDir;
        if (thisOutputDir == "") { thisOutputDir = m->hasPath(filename); }
        string fileroot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
        map<string, string> variables; 
		variables["[filename]"] = fileroot;
        string fasta = getOutputFileName("fasta",variables);
        string name = getOutputFileName("name",variables);
        string group = getOutputFileName("group",variables);
        
        if (m->control_pressed) { return 0; }
        
        if (sffFiles.size() < processors) { processors = sffFiles.size(); }
        
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
#else
        //trim.flows, shhh.flows cannot handle multiple processors for windows.
        processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n");
#endif
        if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size(), fasta, name, group); }
        else { createProcesses(sffFiles, oligosFiles, fasta, name, group); } 
		
		if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]); 	} return 0; }
		
        if (append) { 
            outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta);
            m->setFastaFile(fasta);
            outputNames.push_back(name); outputTypes["name"].push_back(name);
            m->setNameFile(name);
            if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); }
        }
        
        m->setProcessors(toString(processors));
        
		//report output filenames
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();
        
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "SffMultipleCommand", "execute");
		exit(1);
	}
}
コード例 #3
0
ファイル: rarefact.cpp プロジェクト: EdwardMoseley/mothur
int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){
	try {
		RarefactionCurveData* rcd = new RarefactionCurveData();
		for(int i=0;i<displays.size();i++){
			rcd->registerDisplay(displays[i]);
		}
		
		//convert freq percentage to number
		int increment = 1;
		if (percentFreq < 1.0) {  increment = numSeqs * percentFreq;  }
		else { increment = percentFreq;  }	
		
		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				if(processors == 1){
					driver(rcd, increment, nIters);	
				}else{
					vector<int> procIters;
					
					int numItersPerProcessor = nIters / processors;
					
					//divide iters between processes
					for (int i = 0; i < processors; i++) {
						if(i == processors - 1){
							numItersPerProcessor = nIters - i * numItersPerProcessor;
						}
						procIters.push_back(numItersPerProcessor);
					}
					
					createProcesses(procIters, rcd, increment, nIters);
				}

		#else
			driver(rcd, increment, nIters);	
		#endif

		for(int i=0;i<displays.size();i++){
			displays[i]->close();
		}
		
		delete rcd;
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "Rarefact", "getCurve");
		exit(1);
	}
}
コード例 #4
0
ファイル: weighted.cpp プロジェクト: jonls/mothur
EstOutput Weighted::getValues(Tree* t, int p, string o) {
    try {
		data.clear(); //clear out old values
		int numGroups;
		vector<double> D;
		processors = p;
		outputDir = o;
        
        CountTable* ct = t->getCountTable();
		
		numGroups = m->getNumGroups();
		
		if (m->control_pressed) { return data; }
		
		//calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
		vector< vector<string> > namesOfGroupCombos;
		for (int i=0; i<numGroups; i++) { 
			for (int l = 0; l < i; l++) {	
				//initialize weighted scores
				//WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0;
				vector<string> groups; groups.push_back((m->getGroups())[i]); groups.push_back((m->getGroups())[l]);
				namesOfGroupCombos.push_back(groups);
			}
		}
		
        int remainingPairs = namesOfGroupCombos.size();
        int startIndex = 0;
        for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
            int numPairs = remainingPairs; //case for last processor
            if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
            lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
            startIndex = startIndex + numPairs;
            remainingPairs = remainingPairs - numPairs;
        }
        
        data = createProcesses(t, namesOfGroupCombos, ct);
        
        lines.clear();

		return data;
	}
	catch(exception& e) {
		m->errorOut(e, "Weighted", "getValues");
		exit(1);
	}
}
コード例 #5
0
ファイル: degapseqscommand.cpp プロジェクト: mothur/mothur
//***************************************************************************************************************
int DegapSeqsCommand::execute(){
	try{
		
		if (abort) { if (calledHelp) { return 0; }  return 2;	}

        m->mothurOut("Degapping sequences from " + fastafile + " ...\n" );
        
        string tempOutputDir = outputDir;
        if (outputDir == "") { tempOutputDir = util.hasPath(fastafile); }
        map<string, string> variables;
        variables["[filename]"] = tempOutputDir + util.getRootName(util.getSimpleName(fastafile));
        string degapFile = getOutputFileName("fasta", variables);
        outputNames.push_back(degapFile); outputTypes["fasta"].push_back(degapFile);
        
        long start = time(NULL);
        
        int numSeqs = createProcesses(fastafile, degapFile);
        
        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to degap " + toString(numSeqs) + " sequences.\n\n");
        
        if (m->getControl_pressed()) {  for (int j = 0; j < outputNames.size(); j++) {	util.mothurRemove(outputNames[j]);	} return 0; }
		
		//set fasta file as new current fastafile
		string currentName = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setFastaFile(currentName); }
		}
		
		m->mothurOut("\nOutput File Names: \n"); 
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}	
		m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "DegapSeqsCommand", "execute");
		exit(1);
	}
}
コード例 #6
0
ファイル: treegroupscommand.cpp プロジェクト: mothur/mothur
int TreeGroupCommand::makeSimsShared(InputData& input, SharedRAbundVectors*& lookup, CountTable& ct) {
	try {
        
        if (subsample) { 
            if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
                subsampleSize = lookup->getNumSeqsSmallestGroup();
            }else {
                lookup->removeGroups(subsampleSize);
                Groups = lookup->getNamesGroups();
                Treenames = Groups;
            }
            
            if (lookup->size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command.\n");  m->setControl_pressed(true); return 0; }
        }
        numGroups = lookup->size();
        
		set<string> processedLabels;
		set<string> userLabels = labels;
		
		//as long as you are not at the end of the file or done wih the lines you want
		while((lookup != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
            if (m->getControl_pressed()) { delete lookup;  return 1; }
		
			if(allLines == 1 || labels.count(lookup->getLabel()) == 1){
				m->mothurOut(lookup->getLabel()+"\n");
				createProcesses(lookup, ct);
				
				processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
			}
			
			if ((util.anyLabelsToProcess(lookup->getLabel(), userLabels, "") ) && (processedLabels.count(lastLabel) != 1)) {
				string saveLabel = lookup->getLabel();
			
				delete lookup;
				lookup = input.getSharedRAbundVectors(lastLabel);

				m->mothurOut(lookup->getLabel()+"\n");
				createProcesses(lookup, ct);
					
				processedLabels.insert(lookup->getLabel()); userLabels.erase(lookup->getLabel());
				
				//restore real lastlabel to save below
				lookup->setLabels(saveLabel);
			}

			lastLabel = lookup->getLabel();
			
			//get next line to process
			delete lookup;
			lookup = input.getSharedRAbundVectors();
		}
		
		if (m->getControl_pressed()) {  return 1; }

		//output error messages about any remaining user labels
		set<string>::iterator it;
		bool needToRun = false;
		for (it = userLabels.begin(); it != userLabels.end(); it++) {  
			m->mothurOut("Your file does not include the label " + *it); 
			if (processedLabels.count(lastLabel) != 1) {
				m->mothurOut(". I will use " + lastLabel + ".\n");
				needToRun = true;
			}else { m->mothurOut(". Please refer to " + lastLabel + ".\n");  }
		}
		
		//run last label if you need to
		if (needToRun )  {
			delete lookup;
			lookup = input.getSharedRAbundVectors(lastLabel);

			m->mothurOut(lookup->getLabel()+"\n");
			createProcesses(lookup, ct);
			delete lookup;
		}

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "TreeGroupCommand", "makeSimsShared");
		exit(1);
	}
}
コード例 #7
0
int ChimeraCheckCommand::execute(){
	try{
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		for (int i = 0; i < fastaFileNames.size(); i++) {
				
			m->mothurOut("Checking sequences from " + fastaFileNames[i] + " ..." ); m->mothurOutEndLine();
			
			int start = time(NULL);	
			
			string thisNameFile = "";
			if (nameFileNames.size() != 0) { thisNameFile = nameFileNames[i]; }
			
			chimera = new ChimeraCheckRDP(fastaFileNames[i], templatefile, thisNameFile, svg, increment, ksize, outputDir);			

			if (m->control_pressed) { delete chimera;	return 0;	}
			
			if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[i]);  }//if user entered a file with a path then preserve it
            map<string, string> variables;
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i]));
			string outputFileName = getOutputFileName("chimera", variables);
			outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
			
		#ifdef USE_MPI
		
				int pid, numSeqsPerProcessor; 
				int tag = 2001;
				vector<unsigned long long> MPIPos;
				
				MPI_Status status; 
				MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
				MPI_Comm_size(MPI_COMM_WORLD, &processors); 

				MPI_File inMPI;
				MPI_File outMPI;
							
				int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
				int inMode=MPI_MODE_RDONLY; 
							
				char outFilename[1024];
				strcpy(outFilename, outputFileName.c_str());
			
				char inFileName[1024];
				strcpy(inFileName, fastaFileNames[i].c_str());

				MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
				MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
				
				if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} outputTypes.clear(); delete chimera; return 0;  }
				
				if (pid == 0) { //you are the root process 
					MPIPos = m->setFilePosFasta(fastaFileNames[i], numSeqs); //fills MPIPos, returns numSeqs
					
					//send file positions to all processes
					for(int j = 1; j < processors; j++) { 
						MPI_Send(&numSeqs, 1, MPI_INT, j, tag, MPI_COMM_WORLD);
						MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, j, tag, MPI_COMM_WORLD);
					}	
					
					//figure out how many sequences you have to align
					numSeqsPerProcessor = numSeqs / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; 	}
					
				
					//align your part
					driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
					
					if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}   outputTypes.clear(); delete chimera; return 0;  }
					
					//wait on chidren
					for(int j = 1; j < processors; j++) { 
						char buf[5];
						MPI_Recv(buf, 5, MPI_CHAR, j, tag, MPI_COMM_WORLD, &status); 
					}
				}else{ //you are a child process
					MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
					MPIPos.resize(numSeqs+1);
					MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
					
					//figure out how many sequences you have to align
					numSeqsPerProcessor = numSeqs / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; 	}
					
					//align your part
					driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
					
					if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}  outputTypes.clear(); delete chimera; return 0;  }
					
					//tell parent you are done.
					char buf[5];
					strcpy(buf, "done"); 
					MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
				}
				
				//close files 
				MPI_File_close(&inMPI);
				MPI_File_close(&outMPI);
				MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
		#else
			
			
			
			//break up file
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				vector<unsigned long long> positions = m->divideFile(fastaFileNames[i], processors);
			
				for (int s = 0; s < (positions.size()-1); s++) {
					lines.push_back(new linePair(positions[s], positions[(s+1)]));
				}	
			
				if(processors == 1){
					numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
					
					if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} for (int j = 0; j < lines.size(); j++) {  delete lines[j];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
									
				}else{
					processIDS.resize(0);
					
					numSeqs = createProcesses(outputFileName, fastaFileNames[i]); 
				
					rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
						
					//append output files
					for(int j=1;j<processors;j++){
						m->appendFiles((outputFileName + toString(processIDS[j]) + ".temp"), outputFileName);
						m->mothurRemove((outputFileName + toString(processIDS[j]) + ".temp"));
					}
					
					if (m->control_pressed) { 
						for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} outputTypes.clear();
						for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear();
						delete chimera;
						return 0;
					}
				}

			#else
				lines.push_back(new linePair(0, 1000));
				numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
				
				if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear(); for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} outputTypes.clear(); delete chimera; return 0; }
			#endif
		#endif		
			delete chimera;
			for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear();
			
			m->mothurOutEndLine(); m->mothurOut("This method does not determine if a sequence is chimeric, but allows you to make that determination based on the IS values."); m->mothurOutEndLine(); 
			m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");	m->mothurOutEndLine(); m->mothurOutEndLine();

		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}	
		m->mothurOutEndLine();
	
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "ChimeraCheckCommand", "execute");
		exit(1);
	}
}
コード例 #8
0
ファイル: bellerophon.cpp プロジェクト: azmfaridee/mothur
//***************************************************************************************************************
int Bellerophon::getChimeras() {
	try {
		
		//create breaking points
		vector<int> midpoints;   midpoints.resize(iters, window);
		for (int i = 1; i < iters; i++) {  midpoints[i] = midpoints[i-1] + increment;  }
	
	#ifdef USE_MPI
		int pid, numSeqsPerProcessor; 
	
		MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
		MPI_Comm_size(MPI_COMM_WORLD, &processors); 
		
		numSeqsPerProcessor = iters / processors;
		
		//each process hits this only once
		unsigned long long startPos = pid * numSeqsPerProcessor;
		if(pid == processors - 1){
				numSeqsPerProcessor = iters - pid * numSeqsPerProcessor;
		}
		lines.push_back(linePair(startPos, numSeqsPerProcessor));
		
		//fill pref with scores
		driverChimeras(midpoints, lines[0]);
		
		if (m->control_pressed) { return 0; }
				
		//each process must send its parts back to pid 0
		if (pid == 0) {
			
			//receive results 
			for (int j = 1; j < processors; j++) {
				
				vector<string>  MPIBestSend; 
				for (int i = 0; i < numSeqs; i++) {
				
					if (m->control_pressed) { return 0; }

					MPI_Status status;
					//receive string
					int length;
					MPI_Recv(&length, 1, MPI_INT, j, 2001, MPI_COMM_WORLD, &status);
					
					char* buf = new char[length];
					MPI_Recv(&buf[0], length, MPI_CHAR, j, 2001, MPI_COMM_WORLD, &status);
					
					string temp = buf;
					if (temp.length() > length) { temp = temp.substr(0, length); }
					delete buf;

					MPIBestSend.push_back(temp);
				}
				
				fillPref(j, MPIBestSend);
				
				if (m->control_pressed) { return 0; }
			}

		}else {
			//takes best window for each sequence and turns Preference to string that can be parsed by pid 0.
			//played with this a bit, but it may be better to try user-defined datatypes with set string lengths??
			vector<string> MPIBestSend = getBestWindow(lines[0]);
			pref.clear();
				
			//send your result to parent
			for (int i = 0; i < numSeqs; i++) {
				
				if (m->control_pressed) { return 0; }
				
				int bestLength = MPIBestSend[i].length();
				char* buf = new char[bestLength];
				memcpy(buf, MPIBestSend[i].c_str(), bestLength);
				
				MPI_Send(&bestLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD);
				MPI_Send(buf, bestLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD);
				delete buf;
			}
			
			MPIBestSend.clear();
		}
		MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
	#else
	
		//divide breakpoints between processors
		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
			if(processors == 1){ 
				lines.push_back(linePair(0, iters));	
				
				//fill pref with scores
				driverChimeras(midpoints, lines[0]);
	
			}else{
			
				int numSeqsPerProcessor = iters / processors;
				
				for (int i = 0; i < processors; i++) {
					unsigned long long startPos = i * numSeqsPerProcessor;
					if(i == processors - 1){
						numSeqsPerProcessor = iters - i * numSeqsPerProcessor;
					}
					lines.push_back(linePair(startPos, numSeqsPerProcessor));
				}
				
				createProcesses(midpoints);
			}
		#else
			lines.push_back(linePair(0, iters));	
			
			///fill pref with scores
			driverChimeras(midpoints, lines[0]);
		#endif
	
	#endif
	
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "Bellerophon", "getChimeras");
		exit(1);
	}
}
コード例 #9
0
ファイル: parsimony.cpp プロジェクト: jonls/mothur
EstOutput Parsimony::getValues(Tree* t, int p, string o) {
	try {
		processors = p;
		outputDir = o;
        CountTable* ct = t->getCountTable();
		
		//if the users enters no groups then give them the score of all groups
		vector<string> mGroups = m->getGroups();
		int numGroups = mGroups.size();
		
		//calculate number of comparsions
		int numComp = 0;
		vector< vector<string> > namesOfGroupCombos;
		for (int r=0; r<numGroups; r++) { 
			for (int l = 0; l < r; l++) {
				numComp++;
				vector<string> groups; groups.push_back(mGroups[r]); groups.push_back(mGroups[l]);
				//cout << globaldata->Groups[r] << '\t' << globaldata->Groups[l] << endl;
				namesOfGroupCombos.push_back(groups);
			}
		}

		//numComp+1 for AB, AC, BC, ABC
		if (numComp != 1) {
			vector<string> groups;
			if (numGroups == 0) {
				//get score for all users groups
				vector<string> tGroups = ct->getNamesOfGroups();
				for (int i = 0; i < tGroups.size(); i++) {
					if (tGroups[i] != "xxx") {
						groups.push_back(tGroups[i]);
						//cout << tmap->namesOfGroups[i] << endl;
					}
				}
				namesOfGroupCombos.push_back(groups);
			}else {
				for (int i = 0; i < mGroups.size(); i++) {
					groups.push_back(mGroups[i]);
					//cout << globaldata->Groups[i] << endl;
				}
				namesOfGroupCombos.push_back(groups);
			}
		}
        
        lines.clear();
        int remainingPairs = namesOfGroupCombos.size();
        int startIndex = 0;
        for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
            int numPairs = remainingPairs; //case for last processor
            if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
            lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
            startIndex = startIndex + numPairs;
            remainingPairs = remainingPairs - numPairs;
        }
        
        data = createProcesses(t, namesOfGroupCombos, ct);
		
		return data;
		
	}
	catch(exception& e) {
		m->errorOut(e, "Parsimony", "getValues");
		exit(1);
	}
}
コード例 #10
0
int GetMetaCommunityCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
        
        InputData input(sharedfile, "sharedfile");
        vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
        string lastLabel = lookup[0]->getLabel();
        
        //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
        set<string> processedLabels;
        set<string> userLabels = labels;
        
        //as long as you are not at the end of the file or done wih the lines you want
        while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
            
            if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
            
            if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
                
                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                
                createProcesses(lookup);
                
                processedLabels.insert(lookup[0]->getLabel());
                userLabels.erase(lookup[0]->getLabel());
            }
            
            if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
                string saveLabel = lookup[0]->getLabel();
                
                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
                lookup = input.getSharedRAbundVectors(lastLabel);
                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
                
                createProcesses(lookup);
                
                processedLabels.insert(lookup[0]->getLabel());
                userLabels.erase(lookup[0]->getLabel());
                
                //restore real lastlabel to save below
                lookup[0]->setLabel(saveLabel);
            }
            
            lastLabel = lookup[0]->getLabel();
            //prevent memory leak
            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
            
            if (m->control_pressed) { return 0; }
            
            //get next line to process
            lookup = input.getSharedRAbundVectors();
        }
        
        if (m->control_pressed) {  return 0; }
        
        //output error messages about any remaining user labels
        set<string>::iterator it;
        bool needToRun = false;
        for (it = userLabels.begin(); it != userLabels.end(); it++) {
            m->mothurOut("Your file does not include the label " + *it);
            if (processedLabels.count(lastLabel) != 1) {
                m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
                needToRun = true;
            }else {
                m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
            }
        }
        
        //run last label if you need to
        if (needToRun == true)  {
            for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
            lookup = input.getSharedRAbundVectors(lastLabel);
            
            m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
            
            createProcesses(lookup);
            
            for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
        }
		
        //output files created by command
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();
        return 0;
		
    }
	catch(exception& e) {
		m->errorOut(e, "GetMetaCommunityCommand", "execute");
		exit(1);
	}
}
コード例 #11
0
ファイル: aligncommand.cpp プロジェクト: Cryomics-Lab/mothur
int AlignCommand::execute(){
	try {
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}

		templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, m->getRandomNumber(), true);
		
		for (int s = 0; s < candidateFileNames.size(); s++) {
			if (m->control_pressed) { outputTypes.clear(); return 0; }
			
			m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine();
			
			if (outputDir == "") {  outputDir += m->hasPath(candidateFileNames[s]); }
            map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s]));
			string alignFileName = getOutputFileName("fasta", variables);  
			if(align == "noalign") {alignFileName="/dev/null";}
			string reportFileName = getOutputFileName("alignreport", variables);
			string accnosFileName = getOutputFileName("accnos", variables);
            
			bool hasAccnos = true;
			
			int numFastaSeqs = 0;
			for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
			int start = time(NULL);
		

			vector<unsigned long long> positions; 
		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
			positions = m->divideFile(candidateFileNames[s], processors);
			for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
		#else
			if (processors == 1) {
				lines.push_back(new linePair(0, 1000));
			}else {
				positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); 
				if (numFastaSeqs < processors) { processors = numFastaSeqs; }
                
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numFastaSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			}
		#endif
			
			if(processors == 1){
				numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
			}else{
				numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); 
			}
				
			if (m->control_pressed) { m->mothurRemove(accnosFileName); m->mothurRemove(alignFileName); m->mothurRemove(reportFileName); outputTypes.clear();  return 0; }
			
			//delete accnos file if its blank else report to user
			if (m->isBlank(accnosFileName)) {  m->mothurRemove(accnosFileName);  hasAccnos = false; }
			else { 
				m->mothurOut("[WARNING]: Some of your sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
				if (!flip) {
					m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
				}else{  m->mothurOut(" If the reverse compliment proved to be better it was reported.");  }
				m->mothurOutEndLine();
			}

			outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName);
			outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName);
			if (hasAccnos)	{	outputNames.push_back(accnosFileName);	outputTypes["accnos"].push_back(accnosFileName);  }

			m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
			m->mothurOutEndLine();
			m->mothurOutEndLine();
		}
		
		//set align file as new current fastafile
		string currentFasta = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "AlignCommand", "execute");
		exit(1);
	}
}
コード例 #12
0
int DistanceCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		int startTime = time(NULL);
		
		//save number of new sequence
		numNewFasta = alignDB.getNumSeqs();
		
		//sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB
		if ((oldfastafile != "") && (column != ""))  {	if (!(sanityCheck())) { return 0; }  }
		
		if (m->control_pressed) { return 0; }
		
		int numSeqs = alignDB.getNumSeqs();
		cutoff += 0.005;
		
		if (!alignDB.sameLength()) {  m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; }
		
		string outputFile;
        
        map<string, string> variables; 
        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
		if (output == "lt") { //does the user want lower triangle phylip formatted file 
            variables["[outputtag]"] = "phylip";
			outputFile = getOutputFileName("phylip", variables);
			m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile);
			
			//output numSeqs to phylip formatted dist file
		}else if (output == "column") { //user wants column format
			outputFile = getOutputFileName("column", variables);
			outputTypes["column"].push_back(outputFile);
			
			//so we don't accidentally overwrite
			if (outputFile == column) { 
				string tempcolumn = column + ".old"; 
				rename(column.c_str(), tempcolumn.c_str());
			}
			
			m->mothurRemove(outputFile);
		}else { //assume square
			variables["[outputtag]"] = "square";
			outputFile = getOutputFileName("phylip", variables);
			m->mothurRemove(outputFile);
			outputTypes["phylip"].push_back(outputFile);
		}
		

#ifdef USE_MPI
		
		int pid, start, end; 
		int tag = 2001;
				
		MPI_Status status; 
		MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
		MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
		
		//each process gets where it should start and stop in the file
		if (output != "square") {
			start = int (sqrt(float(pid)/float(processors)) * numSeqs);
			end = int (sqrt(float(pid+1)/float(processors)) * numSeqs);
		}else{
			start = int ((float(pid)/float(processors)) * numSeqs);
			end = int ((float(pid+1)/float(processors)) * numSeqs);
		}
		
		if (output == "column") {
			MPI_File outMPI;
			int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 

			//char* filename = new char[outputFile.length()];
			//memcpy(filename, outputFile.c_str(), outputFile.length());
			
			char filename[1024];
			strcpy(filename, outputFile.c_str());
			
			MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI);
			//delete filename;

			if (pid == 0) { //you are the root process 
				
				//do your part
				string outputMyPart;
				
				driverMPI(start, end, outMPI, cutoff); 
				
				if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI);   return 0; }
			
				//wait on chidren
				for(int i = 1; i < processors; i++) { 
					if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);    return 0; }
					
					char buf[5];
					MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
				}
			}else { //you are a child process
				//do your part
				driverMPI(start, end, outMPI, cutoff); 
				
				if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
			
				char buf[5];
				strcpy(buf, "done"); 
				//tell parent you are done.
				MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
			}
			
			MPI_File_close(&outMPI);
			
		}else { //lower triangle format
			if (pid == 0) { //you are the root process 
			
				//do your part
				string outputMyPart;
				unsigned long long mySize;
				
				if (output != "square"){ driverMPI(start, end, outputFile, mySize); }
				else { driverMPI(start, end, outputFile, mySize, output); }
	
				if (m->control_pressed) {  outputTypes.clear();   return 0; }
				
				int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; //
				MPI_File outMPI;
				MPI_File inMPI;

				//char* filename = new char[outputFile.length()];
				//memcpy(filename, outputFile.c_str(), outputFile.length());
				
				char filename[1024];
				strcpy(filename, outputFile.c_str());

				MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI);
				//delete filename;

				//wait on chidren
				for(int b = 1; b < processors; b++) { 
					unsigned long long fileSize;
					
					if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&outMPI);   return 0; }
					
					MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); 
					
					string outTemp = outputFile + toString(b) + ".temp";

					char* buf = new char[outTemp.length()];
					memcpy(buf, outTemp.c_str(), outTemp.length());
					
					MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);
					delete buf;

					int count = 0;
					while (count < fileSize) { 
						char buf2[1];
						MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status);
						MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status);
						count += 1;
					}
					
					MPI_File_close(&inMPI); //deleted on close
				}
				
				MPI_File_close(&outMPI);
			}else { //you are a child process
				//do your part
				unsigned long long size;
				if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); }
				else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); }
				
				if (m->control_pressed) {  return 0; }
			
				//tell parent you are done.
				MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD);
			}
		}
        
        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
        
#else
				
	//#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
		//if you don't need to fork anything
		if(processors == 1){
			if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
			else { driver(0, numSeqs, outputFile, "square");  }
		}else{ //you have multiple processors
			createProcesses(outputFile, numSeqs);
		}
	//#else
		//ifstream inFASTA;
		//if (output != "square") {  driver(0, numSeqs, outputFile, cutoff); }
		//else { driver(0, numSeqs, outputFile, "square");  }
	//#endif
	
#endif
		if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
		
		#ifdef USE_MPI
			MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
					
			if (pid == 0) { //only one process should output to screen
		#endif
		
		//if (output == "square") {  convertMatrix(outputFile); }
		
		ifstream fileHandle;
		fileHandle.open(outputFile.c_str());
		if(fileHandle) {
			m->gobble(fileHandle);
			if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff.");  m->mothurOutEndLine(); }
		}
		
		//append the old column file to the new one
		if ((oldfastafile != "") && (column != ""))  {
			//we had to rename the column file so we didnt overwrite above, but we want to keep old name
			if (outputFile == column) { 
				string tempcolumn = column + ".old";
				m->appendFiles(tempcolumn, outputFile);
				m->mothurRemove(tempcolumn);
			}else{
				m->appendFiles(outputFile, column);
				m->mothurRemove(outputFile);
				outputFile = column;
			}
			
			if (outputDir != "") { 
				string newOutputName = outputDir + m->getSimpleName(outputFile);
				rename(outputFile.c_str(), newOutputName.c_str());
				m->mothurRemove(outputFile);
				outputFile = newOutputName;
			}
		}

		
		#ifdef USE_MPI
			}
		#endif
		
		if (m->control_pressed) { outputTypes.clear();  m->mothurRemove(outputFile); return 0; }
		
		//set phylip file as new current phylipfile
		string current = "";
		itTypes = outputTypes.find("phylip");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); }
		}
		
		//set column file as new current columnfile
		itTypes = outputTypes.find("column");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		m->mothurOut(outputFile); m->mothurOutEndLine();
		m->mothurOutEndLine();
		m->mothurOut("It took " + toString(time(NULL) - startTime) + " seconds to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();


		if (m->isTrue(compress)) {
			m->mothurOut("Compressing..."); m->mothurOutEndLine();
			m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine();
			system(("gzip -v " + outputFile).c_str());
			outputNames.push_back(outputFile + ".gz");
		}else { outputNames.push_back(outputFile); }

		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "DistanceCommand", "execute");
		exit(1);
	}
}
コード例 #13
0
int ClassifySeqsCommand::execute(){
	try {
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
        
        string outputMethodTag = method;
		if(method == "wang"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	}
		else if(method == "knn"){	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());				}
        else if(method == "zap"){	
            outputMethodTag = search + "_" + outputMethodTag;
            if (search == "kmer") {   classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); }
            else {  classify = new AlignTree(templateFileName, taxonomyFileName, cutoff);  }
        }
		else {
			m->mothurOut(search + " is not a valid method option. I will run the command using wang.");
			m->mothurOutEndLine();
			classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);	
		}
		
		if (m->control_pressed) { delete classify; return 0; }
				
		for (int s = 0; s < fastaFileNames.size(); s++) {
		
			m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
			
			string baseTName = m->getSimpleName(taxonomyFileName);
			
            //set rippedTaxName to 
			string RippedTaxName = "";
            bool foundDot = false;
            for (int i = baseTName.length()-1; i >= 0; i--) {
                if (foundDot && (baseTName[i] != '.')) {  RippedTaxName = baseTName[i] + RippedTaxName; }
                else if (foundDot && (baseTName[i] == '.')) {  break; }
                else if (!foundDot && (baseTName[i] == '.')) {  foundDot = true; }
            }
            //if (RippedTaxName != "") { RippedTaxName +=  "."; }   
          
			if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
            map<string, string> variables; 
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
            variables["[tag]"] = RippedTaxName;
            variables["[tag2]"] = outputMethodTag;
			string newTaxonomyFile = getOutputFileName("taxonomy", variables);
			string newaccnosFile = getOutputFileName("accnos", variables);
			string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
			string taxSummary = getOutputFileName("taxsummary", variables);
			
			if ((method == "knn") && (search == "distance")) { 
				string DistName = getOutputFileName("matchdist", variables);
				classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
			}
			
			outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
			outputNames.push_back(taxSummary);	outputTypes["taxsummary"].push_back(taxSummary);
			
			int start = time(NULL);
			int numFastaSeqs = 0;
			for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
		
			vector<unsigned long long> positions; 
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
			positions = m->divideFile(fastaFileNames[s], processors);
			for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
#else
			if (processors == 1) {
				lines.push_back(new linePair(0, 1000));
			}else {
				positions = m->setFilePosFasta(fastaFileNames[s], numFastaSeqs); 
                if (numFastaSeqs < processors) { processors = numFastaSeqs; }
				
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numFastaSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			}
#endif
			if(processors == 1){
				numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]);
			}else{
				numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); 
			}
			
			if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur reversed some your sequences for a better classification.  If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); 
                outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
            }else { m->mothurRemove(newaccnosFile); }

            m->mothurOutEndLine();
            m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
            start = time(NULL);
            
            //read namefile
            if(namefile != "") {
                
                m->mothurOut("Reading " + namefileNames[s] + "..."); cout.flush();
                nameMap.clear(); //remove old names
                m->readNames(namefileNames[s], nameMap);
                m->mothurOut("  Done."); m->mothurOutEndLine();
            }
            
            //output taxonomy with the unclassified bins added
            ifstream inTax;
            m->openInputFile(newTaxonomyFile, inTax);
            
            ofstream outTax;
            string unclass = newTaxonomyFile + ".unclass.temp";
            m->openOutputFile(unclass, outTax);
            
            //get maxLevel from phylotree so you know how many 'unclassified's to add
            int maxLevel = classify->getMaxLevel();
            
            //read taxfile - this reading and rewriting is done to preserve the confidence scores.
            string name, taxon;
            string group = "";
            GroupMap* groupMap = NULL;
            CountTable* ct = NULL;
            PhyloSummary* taxaSum;
            
            if (hasCount) {
                ct = new CountTable();
                ct->readTable(countfileNames[s], true, false);
                taxaSum = new PhyloSummary(ct, relabund, printlevel);
            }else {
                if (groupfile != "") {  group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); }
                taxaSum = new PhyloSummary(groupMap, relabund, printlevel);
            }
            
            while (!inTax.eof()) {
                if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} delete classify; return 0; }
                
                inTax >> name >> taxon; m->gobble(inTax);
                
                string newTax = m->addUnclassifieds(taxon, maxLevel, probs);
                
                outTax << name << '\t' << newTax << endl;
                
                if (namefile != "") {
                    itNames = nameMap.find(name);
                    
                    if (itNames == nameMap.end()) {
                        m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
                    }else{
                        for (int i = 0; i < itNames->second.size(); i++) {
                            taxaSum->addSeqToTree(itNames->second[i], newTax);  //add it as many times as there are identical seqs
                        }
                        itNames->second.clear();
                        nameMap.erase(itNames->first);
                    }
                }else {
                    taxaSum->addSeqToTree(name, newTax);
                }
            }
            inTax.close();
            outTax.close();
            
            m->mothurRemove(newTaxonomyFile);
            rename(unclass.c_str(), newTaxonomyFile.c_str());
            
            if (m->control_pressed) {  outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {	m->mothurRemove(outputNames[i]);	} delete classify; return 0; }
            
            //print summary file
            ofstream outTaxTree;
            m->openOutputFile(taxSummary, outTaxTree);
            taxaSum->print(outTaxTree, output);
            outTaxTree.close();
            
            if (ct != NULL) { delete ct; }
            if (groupMap != NULL) { delete groupMap; } delete taxaSum;
            m->mothurRemove(tempTaxonomyFile);
            
            m->mothurOutEndLine();
            m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
			
		}
        delete classify;
        
        m->mothurOutEndLine();
        m->mothurOut("Output File Names: "); m->mothurOutEndLine();
        for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
        m->mothurOutEndLine();
		
		//set taxonomy file as new current taxonomyfile
		string current = "";
		itTypes = outputTypes.find("taxonomy");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
		}
		
		current = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
		}
		
		
		
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClassifySeqsCommand", "execute");
		exit(1);
	}
}
コード例 #14
0
ファイル: unweighted.cpp プロジェクト: Cryomics-Lab/mothur
EstOutput Unweighted::getValues(Tree* t, int p, string o) {
	try {
		processors = p;
		outputDir = o;
        
        CountTable* ct = t->getCountTable();
        
		//if the users enters no groups then give them the score of all groups
		int numGroups = m->getNumGroups();
		
		//calculate number of comparsions
		int numComp = 0;
		vector< vector<string> > namesOfGroupCombos;
		for (int r=0; r<numGroups; r++) { 
			for (int l = 0; l < r; l++) {
				numComp++;
				vector<string> groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]);
				namesOfGroupCombos.push_back(groups);
			}
		}
		
		if (numComp != 1) {
			vector<string> groups;
			if (numGroups == 0) {
				//get score for all users groups
				for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
					if ((ct->getNamesOfGroups())[i] != "xxx") {
						groups.push_back((ct->getNamesOfGroups())[i]);
					}
				}
				namesOfGroupCombos.push_back(groups);
			}else {
				for (int i = 0; i < m->getNumGroups(); i++) {
					groups.push_back((m->getGroups())[i]);
				}
				namesOfGroupCombos.push_back(groups);
			}
		}
        
        lines.clear();
        int remainingPairs = namesOfGroupCombos.size();
        int startIndex = 0;
        for (int remainingProcessors = processors; remainingProcessors > 0; remainingProcessors--) {
            int numPairs = remainingPairs; //case for last processor
            if (remainingProcessors != 1) { numPairs = ceil(remainingPairs / remainingProcessors); }
            lines.push_back(linePair(startIndex, numPairs)); //startIndex, numPairs
            startIndex = startIndex + numPairs;
            remainingPairs = remainingPairs - numPairs;
        }
        
        data = createProcesses(t, namesOfGroupCombos, ct);

        lines.clear();
        
		return data;
	}
	catch(exception& e) {
		m->errorOut(e, "Unweighted", "getValues");
		exit(1);
	}
}
コード例 #15
0
ファイル: classifyseqscommand.cpp プロジェクト: mothur/mothur
int ClassifySeqsCommand::execute(){
	try {
		if (abort) { if (calledHelp) { return 0; }  return 2;	}
        
        string outputMethodTag = method;
		if(method == "wang"){	classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion());	}
		else if(method == "knn"){	classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, util.getRandomNumber(), current->getVersion());				}
        else if(method == "zap"){	
            outputMethodTag = search + "_" + outputMethodTag;
            if (search == "kmer") {   classify = new KmerTree(templateFileName, taxonomyFileName, kmerSize, cutoff); }
            else {  classify = new AlignTree(templateFileName, taxonomyFileName, cutoff);  }
        }
		else {
			m->mothurOut(search + " is not a valid method option. I will run the command using wang.");
			m->mothurOutEndLine();
			classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, util.getRandomNumber(), flip, writeShortcuts, current->getVersion());
		}
		
		if (m->getControl_pressed()) { delete classify; return 0; }
				
        m->mothurOut("Classifying sequences from " + fastafile + " ...\n" );
        
        string baseTName = util.getSimpleName(taxonomyFileName);
        
        //set rippedTaxName to
        string RippedTaxName = "";
        bool foundDot = false;
        for (int i = baseTName.length()-1; i >= 0; i--) {
            if (foundDot && (baseTName[i] != '.')) {  RippedTaxName = baseTName[i] + RippedTaxName; }
            else if (foundDot && (baseTName[i] == '.')) {  break; }
            else if (!foundDot && (baseTName[i] == '.')) {  foundDot = true; }
        }
        
        if (outputDir == "") { outputDir += util.hasPath(fastafile); }
        map<string, string> variables;
        variables["[filename]"] = outputDir + util.getRootName(util.getSimpleName(fastafile));
        variables["[tag]"] = RippedTaxName;
        variables["[tag2]"] = outputMethodTag;
        string newTaxonomyFile = getOutputFileName("taxonomy", variables);
        string newaccnosFile = getOutputFileName("accnos", variables);
        string tempTaxonomyFile = outputDir + util.getRootName(util.getSimpleName(fastafile)) + "taxonomy.temp";
        string taxSummary = getOutputFileName("taxsummary", variables);
        
        if ((method == "knn") && (search == "distance")) {
            string DistName = getOutputFileName("matchdist", variables);
            classify->setDistName(DistName);  outputNames.push_back(DistName); outputTypes["matchdist"].push_back(DistName);
        }
        
        outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
        outputNames.push_back(taxSummary);	outputTypes["taxsummary"].push_back(taxSummary);
        
        long start = time(NULL);
        int numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastafile);
        
        if (!util.isBlank(newaccnosFile)) { m->mothurOut("\n[WARNING]: mothur reversed some your sequences for a better classification.  If you would like to take a closer look, please check " + newaccnosFile + " for the list of the sequences.\n");
            outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
        }else { util.mothurRemove(newaccnosFile); }
        
        m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences.\n\n");
        start = time(NULL);
        
        //read namefile
        map<string, vector<string> > nameMap;
        map<string,  vector<string> >::iterator itNames;
        if(namefile != "") {
            m->mothurOut("Reading " + namefile + "..."); cout.flush();
            nameMap.clear(); //remove old names
            util.readNames(namefile, nameMap);
            m->mothurOut("  Done.\n");
        }
        
        //output taxonomy with the unclassified bins added
        ifstream inTax;
        util.openInputFile(newTaxonomyFile, inTax);
        
        ofstream outTax;
        string unclass = newTaxonomyFile + ".unclass.temp";
        util.openOutputFile(unclass, outTax);
        
        //get maxLevel from phylotree so you know how many 'unclassified's to add
        int maxLevel = classify->getMaxLevel();
        
        //read taxfile - this reading and rewriting is done to preserve the confidence scores.
        string name, taxon;
        GroupMap* groupMap = NULL;
        CountTable* ct = NULL;
        PhyloSummary* taxaSum;
        
        if (hasCount) {
            ct = new CountTable();
            ct->readTable(countfile, true, false);
            taxaSum = new PhyloSummary(ct, relabund, printlevel);
        }else {
            if (groupfile != "") {  groupMap = new GroupMap(groupfile); groupMap->readMap(); }
            taxaSum = new PhyloSummary(groupMap, relabund, printlevel);
        }
        
        while (!inTax.eof()) {
            if (m->getControl_pressed()) { outputTypes.clear(); if (ct != NULL) { delete ct; }  if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);	} delete classify; return 0; }
            
            inTax >> name; util.gobble(inTax);
            taxon = util.getline(inTax); util.gobble(inTax);
            
            string newTax = util.addUnclassifieds(taxon, maxLevel, probs);
            
            outTax << name << '\t' << newTax << endl;
            
            if (namefile != "") {
                itNames = nameMap.find(name);
                
                if (itNames == nameMap.end()) {
                    m->mothurOut(name + " is not in your name file please correct.\n");  exit(1);
                }else{
                    //add it as many times as there are identical seqs
                    for (int i = 0; i < itNames->second.size(); i++) { taxaSum->addSeqToTree(itNames->second[i], newTax); }
                    itNames->second.clear();
                    nameMap.erase(itNames->first);
                }
            }else { taxaSum->addSeqToTree(name, newTax); }
        }
        inTax.close();
        outTax.close();
        
        util.mothurRemove(newTaxonomyFile);
        util.renameFile(unclass, newTaxonomyFile);
        
        if (m->getControl_pressed()) {  outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) {	util.mothurRemove(outputNames[i]);	} delete classify; return 0; }
        
        //print summary file
        ofstream outTaxTree;
        util.openOutputFile(taxSummary, outTaxTree);
        taxaSum->print(outTaxTree, output);
        outTaxTree.close();
        
        if (ct != NULL) { delete ct; }
        if (groupMap != NULL) { delete groupMap; } delete taxaSum;
        util.mothurRemove(tempTaxonomyFile);
        delete classify;
        
        m->mothurOut("\nIt took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numFastaSeqs) + " sequences.\n\n");

        m->mothurOut("\nOutput File Names: \n");
        for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
        m->mothurOutEndLine();
		
		//set taxonomy file as new current taxonomyfile
		string currentName = "";
		itTypes = outputTypes.find("taxonomy");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setTaxonomyFile(currentName); } }
		
		currentName = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { currentName = (itTypes->second)[0]; current->setAccnosFile(currentName); } }

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ClassifySeqsCommand", "execute");
		exit(1);
	}
}
コード例 #16
0
int ChimeraPintailCommand::execute(){
	try{
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
		for (int s = 0; s < fastaFileNames.size(); s++) {
				
			m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();

			int start = time(NULL);	
			
			//set user options
			if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
			
			//check for quantile to save the time
			string baseName = templatefile;
			if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
			
			string tempQuan = "";
			if ((!filter) && (maskfile == "")) {
				tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
			}else if ((!filter) && (maskfile != "")) { 
				tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
			}else if ((filter) && (maskfile != "")) { 
				tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
			}else if ((filter) && (maskfile == "")) { 
				tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
			}
			
			ifstream FileTest(tempQuan.c_str());
			if(FileTest){	
				bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
				if (GoodFile) {  
					m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();	
				}
			}else {
				string tryPath = m->getDefaultPath();
				string tempQuan = "";
				if ((!filter) && (maskfile == "")) {
					tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
				}else if ((!filter) && (maskfile != "")) { 
					tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
				}else if ((filter) && (maskfile != "")) { 
					tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
				}else if ((filter) && (maskfile == "")) { 
					tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
				}
				
				ifstream FileTest2(tempQuan.c_str());
				if(FileTest2){	
					bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
					if (GoodFile) {  
						m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();	
					}
				}
			}
			chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
			
			if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
			string outputFileName, accnosFileName;
            map<string, string> variables;
            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
			if (maskfile != "") { variables["[tag]"] = m->getSimpleName(m->getRootName(maskfile)); }
            outputFileName = getOutputFileName("chimera", variables);
            accnosFileName = getOutputFileName("accnos", variables);
			
			
			if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}  return 0;	}
			
			if (chimera->getUnaligned()) { 
				m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
				delete chimera;
				return 0; 
			}
			templateSeqsLength = chimera->getLength();
		
		#ifdef USE_MPI
			int pid, numSeqsPerProcessor; 
				int tag = 2001;
				vector<unsigned long long> MPIPos;
				
				MPI_Status status; 
				MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
				MPI_Comm_size(MPI_COMM_WORLD, &processors); 

				MPI_File inMPI;
				MPI_File outMPI;
				MPI_File outMPIAccnos;
				
				int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
				int inMode=MPI_MODE_RDONLY; 
				
				char outFilename[1024];
				strcpy(outFilename, outputFileName.c_str());
				
				char outAccnosFilename[1024];
				strcpy(outAccnosFilename, accnosFileName.c_str());
				
				char inFileName[1024];
				strcpy(inFileName, fastaFileNames[s].c_str());

				MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
				MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
				MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
				
				if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}  delete chimera; return 0;  }

				if (pid == 0) { //you are the root process 
								
					MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
					
					//send file positions to all processes
					for(int i = 1; i < processors; i++) { 
						MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
						MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
					}
					
					//figure out how many sequences you have to align
					numSeqsPerProcessor = numSeqs / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; 	}
				
					//do your part
					driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
					
					if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}  delete chimera; return 0;  }
					
				}else{ //you are a child process
					MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
					MPIPos.resize(numSeqs+1);
					MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
					
					//figure out how many sequences you have to align
					numSeqsPerProcessor = numSeqs / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; 	}
					
					//do your part
					driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
					
					if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	}  delete chimera; return 0;  }
				}
				
				//close files 
				MPI_File_close(&inMPI);
				MPI_File_close(&outMPI);
				MPI_File_close(&outMPIAccnos);
				MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
		#else
						
			//break up file
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
			
				for (int i = 0; i < (positions.size()-1); i++) {
					lines.push_back(new linePair(positions[i], positions[(i+1)]));
				}	
			
				if(processors == 1){
		
					numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
					
					if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
					
				}else{
					processIDS.resize(0);
					
					numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
				
					rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
					rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
						
					//append output files
					for(int i=1;i<processors;i++){
						m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
						m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
					}
					
					//append output files
					for(int i=1;i<processors;i++){
						m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
						m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
					}
										
					if (m->control_pressed) { 
						m->mothurRemove(outputFileName); 
						m->mothurRemove(accnosFileName);
						for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} outputTypes.clear();
						for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
						delete chimera;
						return 0;
					}
				}

			#else
				lines.push_back(new linePair(0, 1000));
				numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
				
				if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {	m->mothurRemove(outputNames[j]);	} for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
			#endif
			
		#endif	
		
			delete chimera;
			for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
			
			outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
			outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
			
			m->mothurOutEndLine();
			m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences.");	m->mothurOutEndLine();
		}
		
		//set accnos file as new current accnosfile
		string current = "";
		itTypes = outputTypes.find("accnos");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}	
		m->mothurOutEndLine();
			
		return 0;
		
	}
	catch(exception& e) {
		m->errorOut(e, "ChimeraPintailCommand", "execute");
		exit(1);
	}
}
コード例 #17
0
ファイル: aligncommand.cpp プロジェクト: azerxu/mothur
int AlignCommand::execute(){
	try {
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}

		templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, rand());
		
		for (int s = 0; s < candidateFileNames.size(); s++) {
			if (m->control_pressed) { outputTypes.clear(); return 0; }
			
			m->mothurOut("Aligning sequences from " + candidateFileNames[s] + " ..." ); m->mothurOutEndLine();
			
			if (outputDir == "") {  outputDir += m->hasPath(candidateFileNames[s]); }
			string alignFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align";
			string reportFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "align.report";
			string accnosFileName = outputDir + m->getRootName(m->getSimpleName(candidateFileNames[s])) + "flip.accnos";
			bool hasAccnos = true;
			
			int numFastaSeqs = 0;
			for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
			int start = time(NULL);
		
#ifdef USE_MPI	
				int pid, numSeqsPerProcessor; 
				int tag = 2001;
				vector<unsigned long long> MPIPos;
				MPIWroteAccnos = false;
			
				MPI_Status status; 
				MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
				MPI_Comm_size(MPI_COMM_WORLD, &processors); 

				MPI_File inMPI;
				MPI_File outMPIAlign;
				MPI_File outMPIReport;
				MPI_File outMPIAccnos;
				
				int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
				int inMode=MPI_MODE_RDONLY; 
				
				char outAlignFilename[1024];
				strcpy(outAlignFilename, alignFileName.c_str());
				
				char outReportFilename[1024];
				strcpy(outReportFilename, reportFileName.c_str());
				
				char outAccnosFilename[1024];
				strcpy(outAccnosFilename, accnosFileName.c_str());
				
				char inFileName[1024];
				strcpy(inFileName, candidateFileNames[s].c_str());
				
				MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
				MPI_File_open(MPI_COMM_WORLD, outAlignFilename, outMode, MPI_INFO_NULL, &outMPIAlign);
				MPI_File_open(MPI_COMM_WORLD, outReportFilename, outMode, MPI_INFO_NULL, &outMPIReport);
				MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
				
				if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }
				
				if (pid == 0) { //you are the root process 
					
					MPIPos = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); //fills MPIPos, returns numSeqs
					
					//send file positions to all processes
					for(int i = 1; i < processors; i++) { 
						MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
						MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
					}
					
					//figure out how many sequences you have to align
					numSeqsPerProcessor = numFastaSeqs / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; 	}
					
					//align your part
					driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
					
					if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }

					for (int i = 1; i < processors; i++) {
						bool tempResult;
						MPI_Recv(&tempResult, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
						if (tempResult != 0) { MPIWroteAccnos = true; }
					}
				}else{ //you are a child process
					MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
					MPIPos.resize(numFastaSeqs+1);
					MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);

					
					//figure out how many sequences you have to align
					numSeqsPerProcessor = numFastaSeqs / processors;
					int startIndex =  pid * numSeqsPerProcessor;
					if(pid == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; 	}
					
					
					//align your part
					driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
					
					if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIAlign);  MPI_File_close(&outMPIReport);  MPI_File_close(&outMPIAccnos); outputTypes.clear(); return 0; }

					MPI_Send(&MPIWroteAccnos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
				}
				
				//close files 
				MPI_File_close(&inMPI);
				MPI_File_close(&outMPIAlign);
				MPI_File_close(&outMPIReport);
				MPI_File_close(&outMPIAccnos);
				
				//delete accnos file if blank
				if (pid == 0) {
					//delete accnos file if its blank else report to user
					if (MPIWroteAccnos) { 
						m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
						if (!flip) {
							m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
						}else{  m->mothurOut(" If the reverse compliment proved to be better it was reported.");  }
						m->mothurOutEndLine();
					}else { 
						//MPI_Info info;
						//MPI_File_delete(outAccnosFilename, info);
						hasAccnos = false;	
						m->mothurRemove(accnosFileName); 
					}
				}
				
#else

			vector<unsigned long long> positions; 
		#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
			positions = m->divideFile(candidateFileNames[s], processors);
			for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(new linePair(positions[i], positions[(i+1)]));	}
		#else
			if (processors == 1) {
				lines.push_back(new linePair(0, 1000));
			}else {
				positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); 
				
				//figure out how many sequences you have to process
				int numSeqsPerProcessor = numFastaSeqs / processors;
				for (int i = 0; i < processors; i++) {
					int startIndex =  i * numSeqsPerProcessor;
					if(i == (processors - 1)){	numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; 	}
					lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
				}
			}
		#endif
			
			if(processors == 1){
				numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
			}else{
				numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); 
			}
				
			if (m->control_pressed) { m->mothurRemove(accnosFileName); m->mothurRemove(alignFileName); m->mothurRemove(reportFileName); outputTypes.clear();  return 0; }
			
			//delete accnos file if its blank else report to user
			if (m->isBlank(accnosFileName)) {  m->mothurRemove(accnosFileName);  hasAccnos = false; }
			else { 
				m->mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
				if (!flip) {
					m->mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
				}else{  m->mothurOut(" If the reverse compliment proved to be better it was reported.");  }
				m->mothurOutEndLine();
			}

#endif		


		#ifdef USE_MPI
			MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
					
			if (pid == 0) { //only one process should output to screen
		#endif

			outputNames.push_back(alignFileName); outputTypes["fasta"].push_back(alignFileName);
			outputNames.push_back(reportFileName); outputTypes["alignreport"].push_back(reportFileName);
			if (hasAccnos)	{	outputNames.push_back(accnosFileName);	outputTypes["accnos"].push_back(accnosFileName);  }
			
		#ifdef USE_MPI
			}
		#endif

			m->mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
			m->mothurOutEndLine();
			m->mothurOutEndLine();
		}
		
		//set align file as new current fastafile
		string currentFasta = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { currentFasta = (itTypes->second)[0]; m->setFastaFile(currentFasta); }
		}
		
		m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();

		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "AlignCommand", "execute");
		exit(1);
	}
}
コード例 #18
0
ファイル: sparcccommand.cpp プロジェクト: Cryomics-Lab/mothur
//**********************************************************************************************************************
int SparccCommand::process(vector<SharedRAbundVector*>& lookup){
	try {
        cout.setf(ios::fixed, ios::floatfield);
        cout.setf(ios::showpoint);
        
        vector<vector<float> > sharedVector;
        vector<string> otuNames = m->currentSharedBinLabels;
        
        //fill sharedVector to pass to CalcSparcc
        for (int i = 0; i < lookup.size(); i++) {
            vector<int> abunds = lookup[i]->getAbundances();
            vector<float> temp;
            for (int j = 0; j < abunds.size(); j++) { temp.push_back((float) abunds[j]); }
            sharedVector.push_back(temp);
        }
        int numOTUs = (int)sharedVector[0].size();
        int numGroups = lookup.size();
        
        map<string, string> variables;
        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
        variables["[distance]"] = lookup[0]->getLabel();
        

        string relAbundFileName = getOutputFileName("sparccrelabund", variables);
        ofstream relAbundFile;
        m->openOutputFile(relAbundFileName, relAbundFile);
        outputNames.push_back(relAbundFileName);  outputTypes["sparccrelabund"].push_back(relAbundFileName);
        
        relAbundFile << "OTU\taveRelAbund\n";
        for(int i=0;i<numOTUs;i++){
            if (m->control_pressed) { relAbundFile.close(); return 0; }
            
            double relAbund = 0.0000;
            for(int j=0;j<numGroups;j++){
                relAbund += sharedVector[j][i]/(double)lookup[j]->getNumSeqs();
            }
            relAbundFile << otuNames[i] <<'\t' << relAbund / (double) numGroups << endl;
        }
        relAbundFile.close();
        
        CalcSparcc originalData(sharedVector, maxIterations, numSamplings, normalizeMethod);
        vector<vector<float> > origCorrMatrix = originalData.getRho();
        
        string correlationFileName = getOutputFileName("corr", variables);
        ofstream correlationFile;
        m->openOutputFile(correlationFileName, correlationFile);
        outputNames.push_back(correlationFileName);  outputTypes["corr"].push_back(correlationFileName);
        correlationFile.setf(ios::fixed, ios::floatfield);
        correlationFile.setf(ios::showpoint);
        
        for(int i=0;i<numOTUs;i++){ correlationFile << '\t' << otuNames[i];    }   correlationFile << endl;
        for(int i=0;i<numOTUs;i++){
            correlationFile << otuNames[i];
            for(int j=0;j<numOTUs;j++){
                correlationFile << '\t' << origCorrMatrix[i][j];
            }
            correlationFile << endl;
        }
        
        
        if(numPermutations != 0){
            vector<vector<float> > pValues = createProcesses(sharedVector, origCorrMatrix);
            
            if (m->control_pressed) { return 0; }
            
            string pValueFileName = getOutputFileName("pvalue", variables);
            ofstream pValueFile;
            m->openOutputFile(pValueFileName, pValueFile);
            outputNames.push_back(pValueFileName);  outputTypes["pvalue"].push_back(pValueFileName);
            pValueFile.setf(ios::fixed, ios::floatfield);
            pValueFile.setf(ios::showpoint);
            
            for(int i=0;i<numOTUs;i++){ pValueFile << '\t' << otuNames[i];    }   pValueFile << endl;
            for(int i=0;i<numOTUs;i++){
                pValueFile << otuNames[i];
                for(int j=0;j<numOTUs;j++){
                    pValueFile << '\t' << pValues[i][j];
                }
                pValueFile << endl;
            }
        }


        return 0;
    }
	catch(exception& e) {
		m->errorOut(e, "SparccCommand", "process");
		exit(1);
	}
}
コード例 #19
0
int ChopSeqsCommand::execute(){
	try {
		
		if (abort == true) { if (calledHelp) { return 0; }  return 2;	}
		
        map<string, string> variables;
        string thisOutputDir = outputDir;
		if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
        variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
        string outputFileName = getOutputFileName("fasta", variables);
        outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
        string outputFileNameAccnos = getOutputFileName("accnos", variables);
        
        string fastafileTemp = "";
        if (qualfile != "") {  fastafileTemp = outputFileName + ".qualFile.Positions.temp"; }
        
        vector<unsigned long long> positions; 
        vector<linePair> lines;
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
        positions = m->divideFile(fastafile, processors);
        for (int i = 0; i < (positions.size()-1); i++) {	lines.push_back(linePair(positions[i], positions[(i+1)]));	}
#else
        int numSeqs = 0;
        positions = m->setFilePosFasta(fastafile, numSeqs); 
        if (numSeqs < processors) { processors = numSeqs; }
		
        //figure out how many sequences you have to process
        int numSeqsPerProcessor = numSeqs / processors;
        for (int i = 0; i < processors; i++) {
            int startIndex =  i * numSeqsPerProcessor;
            if(i == (processors - 1)){	numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; 	}
            lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
        }
#endif
        
        bool wroteAccnos = false;
        if(processors == 1) {   wroteAccnos = driver(lines[0], fastafile, outputFileName, outputFileNameAccnos, fastafileTemp);        }
        else                {   wroteAccnos = createProcesses(lines, fastafile, outputFileName, outputFileNameAccnos, fastafileTemp);  }
        
        if (m->control_pressed) {  return 0; }
        
        if (qualfile != "") {
            thisOutputDir = outputDir;
            if (outputDir == "") {  thisOutputDir += m->hasPath(qualfile);  }
            variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(qualfile));
            string outputQualFileName = getOutputFileName("qfile", variables);
            outputNames.push_back(outputQualFileName); outputTypes["qfile"].push_back(outputQualFileName);
            
            processQual(outputQualFileName, fastafileTemp);
            m->mothurRemove(fastafileTemp);
        }
		
        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
        
        if (wroteAccnos) {
            outputNames.push_back(outputFileNameAccnos); outputTypes["accnos"].push_back(outputFileNameAccnos);
            
             //use remove.seqs to create new name, group and count file
            if ((countfile != "") || (namefile != "") || (groupfile != "")) {
                string inputString = "accnos=" + outputFileNameAccnos;
                
                if (countfile != "") {  inputString += ", count=" + countfile;  }
                else{
                    if (namefile != "") {  inputString += ", name=" + namefile;  }
                    if (groupfile != "") {  inputString += ", group=" + groupfile;  }
                }
                
                m->mothurOut("/******************************************/"); m->mothurOutEndLine();
                m->mothurOut("Running command: remove.seqs(" + inputString + ")"); m->mothurOutEndLine();
                m->mothurCalling = true;
                
                Command* removeCommand = new RemoveSeqsCommand(inputString);
                removeCommand->execute();
                
                map<string, vector<string> > filenames = removeCommand->getOutputFiles();
                
                delete removeCommand;
                m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine();
                
                if (groupfile != "") {
                    thisOutputDir = outputDir;
                    if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
                    variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
                    string outGroup = getOutputFileName("group", variables);
                    m->renameFile(filenames["group"][0], outGroup);
                    outputNames.push_back(outGroup); outputTypes["group"].push_back(outGroup);
                }
                
                if (namefile != "") {
                    thisOutputDir = outputDir;
                    if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
                    variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
                    string outName = getOutputFileName("name", variables);
                    m->renameFile(filenames["name"][0], outName);
                    outputNames.push_back(outName); outputTypes["name"].push_back(outName);
                }
                
                if (countfile != "") {
                    thisOutputDir = outputDir;
                    if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
                    variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
                    string outCount = getOutputFileName("count", variables);
                    m->renameFile(filenames["count"][0], outCount);
                    outputNames.push_back(outCount); outputTypes["count"].push_back(outCount);
                }
            }
        }
		else {  m->mothurRemove(outputFileNameAccnos);  }
		
        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
        
		//set fasta file as new current fastafile
		string current = "";
		itTypes = outputTypes.find("fasta");
		if (itTypes != outputTypes.end()) {
			if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
		}
        
		if (wroteAccnos) { //set accnos file as new current accnosfile
			itTypes = outputTypes.find("accnos");
			if (itTypes != outputTypes.end()) {
				if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
			}
            
            itTypes = outputTypes.find("name");
            if (itTypes != outputTypes.end()) {
                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
            }
            
            itTypes = outputTypes.find("group");
            if (itTypes != outputTypes.end()) {
                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
            }
            
            itTypes = outputTypes.find("count");
            if (itTypes != outputTypes.end()) {
                if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
            }
		}
        
        
		
        m->mothurOutEndLine();
		m->mothurOut("Output File Names: "); m->mothurOutEndLine();
		for (int i = 0; i < outputNames.size(); i++) {	m->mothurOut(outputNames[i]); m->mothurOutEndLine();	}
		m->mothurOutEndLine();
		
		return 0;		
	}

	catch(exception& e) {
		m->errorOut(e, "ChopSeqsCommand", "execute");
		exit(1);
	}
}