Exemple #1
0
//**********************************************************************************************************************
int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){
	try {
		ofstream alignmentFile;
		m->openOutputFile(alignFName, alignmentFile);
		
		ofstream accnosFile;
		m->openOutputFile(accnosFName, accnosFile);
		
		NastReport report(reportFName);
		
		ifstream inFASTA;
		m->openInputFile(filename, inFASTA);

		inFASTA.seekg(filePos->start);

		bool done = false;
		int count = 0;
		
		//moved this into driver to avoid deep copies in windows paralellized version
		Alignment* alignment;
		int longestBase = templateDB->getLongestBase();
        if (m->debug) { m->mothurOut("[DEBUG]: template longest base = "  + toString(templateDB->getLongestBase()) + " \n"); }
		if(align == "gotoh")			{	alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);			}
		else if(align == "needleman")	{	alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);				}
		else if(align == "blast")		{	alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);		}
		else if(align == "noalign")		{	alignment = new NoAlign();													}
		else {
			m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
			m->mothurOutEndLine();
			alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
		}
	
		while (!done) {
			
			if (m->control_pressed) {  break; }
			
			Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
			report.setCandidate(candidateSeq);

			int origNumBases = candidateSeq->getNumBases();
			string originalUnaligned = candidateSeq->getUnaligned();
			int numBasesNeeded = origNumBases * threshold;
	
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) {
                    if (m->debug) { m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); }
					alignment->resize(candidateSeq->getUnaligned().length()+2);
				}
				Sequence temp = templateDB->findClosestSequence(candidateSeq);
				Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
				
				float searchScore = templateDB->getSearchScore();
								
				Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
		
				Sequence* copy;
				
				Nast* nast2;
				bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
												//since nast does not make a copy of hte sequence passed, and it is used by the reporter below
												//you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
												//so this bool tells you if you need to delete it
												
				//if there is a possibility that this sequence should be reversed
				if (candidateSeq->getNumBases() < numBasesNeeded) {
					
					string wasBetter =  "";
					//if the user wants you to try the reverse
					if (flip) {
				
						//get reverse compliment
						copy = new Sequence(candidateSeq->getName(), originalUnaligned);
						copy->reverseComplement();
                        
                        if (m->debug) { m->mothurOut("[DEBUG]: flipping "  + candidateSeq->getName() + " \n"); }
						
						//rerun alignment
						Sequence temp2 = templateDB->findClosestSequence(copy);
						Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
                        
                        if (m->debug) { m->mothurOut("[DEBUG]: closest template "  + temp2.getName() + " \n"); }
						
						searchScore = templateDB->getSearchScore();
						
						nast2 = new Nast(alignment, copy, templateSeq2);
                        
                        if (m->debug) { m->mothurOut("[DEBUG]: completed Nast2 "  + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); }
			
						//check if any better
						if (copy->getNumBases() > candidateSeq->getNumBases()) {
							candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
                            delete templateSeq;
							templateSeq = templateSeq2;
							delete nast;
							nast = nast2;
							needToDeleteCopy = true;
							wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
						}else{  
							wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
							delete nast2;
                            delete templateSeq2;
							delete copy;	
						}
                        if (m->debug) { m->mothurOut("[DEBUG]: done.\n"); }
					}
					
					//create accnos file with names
					accnosFile << candidateSeq->getName() << wasBetter << endl;
				}
				
				report.setTemplate(templateSeq);
				report.setSearchParameters(search, searchScore);
				report.setAlignmentParameters(align, alignment);
				report.setNastParameters(*nast);
	
				alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
				
				report.print();
				delete nast;
                delete templateSeq;
				if (needToDeleteCopy) {   delete copy;   }
				
				count++;
			}
			delete candidateSeq;
			
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				unsigned long long pos = inFASTA.tellg();
				if ((pos == -1) || (pos >= filePos->end)) { break; }
			#else
				if (inFASTA.eof()) { break; }
			#endif
			
			//report progress
			if((count) % 100 == 0){	m->mothurOutJustToScreen(toString(count) + "\n"); 		}
			
		}
		//report progress
		if((count) % 100 != 0){	m->mothurOutJustToScreen(toString(count) + "\n"); 		}
		
		delete alignment;
		alignmentFile.close();
		inFASTA.close();
		accnosFile.close();
		
		return count;
	}
	catch(exception& e) {
		m->errorOut(e, "AlignCommand", "driver");
		exit(1);
	}
}
Exemple #2
0
/********************************************************************/
TrimOligos::~TrimOligos() {}
//*******************************************************************/
int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
	try {
		
		string rawSequence = seq.getUnaligned();
		int success = bdiffs + 1;	//guilty until proven innocent
		
		//can you find the barcode
		for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
			string oligo = it->first;
			if(rawSequence.length() < oligo.length()){	//let's just assume that the barcodes are the same length
				success = bdiffs + 10;					//if the sequence is shorter than the barcode then bail out
				break;	
			}
			
			if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
				group = it->second;
				seq.setUnaligned(rawSequence.substr(oligo.length()));
				
				if(qual.getName() != ""){
					qual.trimQScores(oligo.length(), -1);
				}
				
				success = 0;
				break;
			}
		}
		
		//if you found the barcode or if you don't want to allow for diffs
		if ((bdiffs == 0) || (success == 0)) { return success;  }
		
		else { //try aligning and see if you can find it
			
			int maxLength = 0;
			
			Alignment* alignment;
			if (barcodes.size() > 0) {
				map<string,int>::iterator it=barcodes.begin();
				
				for(it;it!=barcodes.end();it++){
					if(it->first.length() > maxLength){
						maxLength = it->first.length();
					}
				}
				alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
				
			}else{ alignment = NULL; } 
			
			//can you find the barcode
			int minDiff = 1e6;
			int minCount = 1;
			int minGroup = -1;
			int minPos = 0;
			
			for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
				string oligo = it->first;
				//				int length = oligo.length();
				
				if(rawSequence.length() < maxLength){	//let's just assume that the barcodes are the same length
					success = bdiffs + 10;
					break;
				}
				
				//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
				alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
				oligo = alignment->getSeqAAln();
				string temp = alignment->getSeqBAln();
				
				int alnLength = oligo.length();
				
				for(int i=oligo.length()-1;i>=0;i--){
					if(oligo[i] != '-'){	alnLength = i+1;	break;	}
				}
				oligo = oligo.substr(0,alnLength);
				temp = temp.substr(0,alnLength);
				
				int numDiff = countDiffs(oligo, temp);
				
				if(numDiff < minDiff){
					minDiff = numDiff;
					minCount = 1;
					minGroup = it->second;
					minPos = 0;
					for(int i=0;i<alnLength;i++){
						if(temp[i] != '-'){
							minPos++;
						}
					}
				}
				else if(numDiff == minDiff){
					minCount++;
				}
				
			}
			
			if(minDiff > bdiffs)	{	success = minDiff;		}	//no good matches
			else if(minCount > 1)	{	success = bdiffs + 100;	}	//can't tell the difference between multiple barcodes
			else{													//use the best match
				group = minGroup;
				seq.setUnaligned(rawSequence.substr(minPos));
				
				if(qual.getName() != ""){
					qual.trimQScores(minPos, -1);
				}
				success = minDiff;
			}
			
			if (alignment != NULL) {  delete alignment;  }
			
		}
		
		return success;
		
	}
	catch(exception& e) {
		m->errorOut(e, "TrimOligos", "stripBarcode");
		exit(1);
	}
	
}
//*******************************************************************/
int TrimOligos::stripBarcode(Sequence& seq, int& group){
	try {
		
		string rawSequence = seq.getUnaligned();
		int success = bdiffs + 1;	//guilty until proven innocent
		
		//can you find the barcode
		for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
			string oligo = it->first;
			if(rawSequence.length() < oligo.length()){	//let's just assume that the barcodes are the same length
				success = bdiffs + 10;					//if the sequence is shorter than the barcode then bail out
				break;	
			}
			
			if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
				group = it->second;
				seq.setUnaligned(rawSequence.substr(oligo.length()));
				
				success = 0;
				break;
			}
		}
		
		//if you found the barcode or if you don't want to allow for diffs
		if ((bdiffs == 0) || (success == 0)) { return success;  }
		
		else { //try aligning and see if you can find it
			
			int maxLength = 0;
			
			Alignment* alignment;
			if (barcodes.size() > 0) {
				map<string,int>::iterator it=barcodes.begin();
				
				for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
					if(it->first.length() > maxLength){
						maxLength = it->first.length();
					}
				}
				alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
				
			}else{ alignment = NULL; } 
			
			//can you find the barcode
			int minDiff = 1e6;
			int minCount = 1;
			int minGroup = -1;
			int minPos = 0;
			
			for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
				string oligo = it->first;
				//				int length = oligo.length();
				
				if(rawSequence.length() < maxLength){	//let's just assume that the barcodes are the same length
					success = bdiffs + 10;
					break;
				}
				
				//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
				alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
				oligo = alignment->getSeqAAln();
				string temp = alignment->getSeqBAln();
				
				int alnLength = oligo.length();
				
				for(int i=oligo.length()-1;i>=0;i--){
					if(oligo[i] != '-'){	alnLength = i+1;	break;	}
				}
				oligo = oligo.substr(0,alnLength);
				temp = temp.substr(0,alnLength);
				
				int numDiff = countDiffs(oligo, temp);
				
				if(numDiff < minDiff){
					minDiff = numDiff;
					minCount = 1;
					minGroup = it->second;
					minPos = 0;
					for(int i=0;i<alnLength;i++){
						if(temp[i] != '-'){
							minPos++;
						}
					}
				}
				else if(numDiff == minDiff){
					minCount++;
				}
				
			}
			
			if(minDiff > bdiffs)	{	success = minDiff;		}	//no good matches
			else if(minCount > 1)	{	success = bdiffs + 100;	}	//can't tell the difference between multiple barcodes
			else{													//use the best match
				group = minGroup;
				seq.setUnaligned(rawSequence.substr(minPos));
				success = minDiff;
			}
			
			if (alignment != NULL) {  delete alignment;  }
			
		}
		
		return success;
		
	}
	catch(exception& e) {
		m->errorOut(e, "TrimOligos", "stripBarcode");
		exit(1);
	}
	
}
//********************************************************************/
int TrimOligos::stripForward(Sequence& seq, int& group){
	try {
		
		string rawSequence = seq.getUnaligned();
		int success = pdiffs + 1;	//guilty until proven innocent
		
		//can you find the primer
		for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
			string oligo = it->first;
			if(rawSequence.length() < oligo.length()){	//let's just assume that the primers are the same length
				success = pdiffs + 10;					//if the sequence is shorter than the barcode then bail out
				break;	
			}
			
			if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
				group = it->second;
				seq.setUnaligned(rawSequence.substr(oligo.length()));
				success = 0;
				break;
			}
		}
		
		//if you found the barcode or if you don't want to allow for diffs
		if ((pdiffs == 0) || (success == 0)) {	return success;  }
		
		else { //try aligning and see if you can find it
			
			int maxLength = 0;
			
			Alignment* alignment;
			if (primers.size() > 0) {
				map<string,int>::iterator it=primers.begin();
				
				for(it;it!=primers.end();it++){
					if(it->first.length() > maxLength){
						maxLength = it->first.length();
					}
				}
				alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
				
			}else{ alignment = NULL; } 
			
			//can you find the barcode
			int minDiff = 1e6;
			int minCount = 1;
			int minGroup = -1;
			int minPos = 0;
			
			for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
				string oligo = it->first;
				//				int length = oligo.length();
				
				if(rawSequence.length() < maxLength){	
					success = pdiffs + 100;
					break;
				}
				
				//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
				alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
				oligo = alignment->getSeqAAln();
				string temp = alignment->getSeqBAln();
				
				int alnLength = oligo.length();
				
				for(int i=oligo.length()-1;i>=0;i--){
					if(oligo[i] != '-'){	alnLength = i+1;	break;	}
				}
				oligo = oligo.substr(0,alnLength);
				temp = temp.substr(0,alnLength);
				
				int numDiff = countDiffs(oligo, temp);
				
				if(numDiff < minDiff){
					minDiff = numDiff;
					minCount = 1;
					minGroup = it->second;
					minPos = 0;
					for(int i=0;i<alnLength;i++){
						if(temp[i] != '-'){
							minPos++;
						}
					}
				}
				else if(numDiff == minDiff){
					minCount++;
				}
				
			}
			
			if(minDiff > pdiffs)	{	success = minDiff;		}	//no good matches
			else if(minCount > 1)	{	success = pdiffs + 10;	}	//can't tell the difference between multiple primers
			else{													//use the best match
				group = minGroup;
				seq.setUnaligned(rawSequence.substr(minPos));
				success = minDiff;
			}
			
			if (alignment != NULL) {  delete alignment;  }
			
		}
		
		return success;
		
	}
	catch(exception& e) {
		m->errorOut(e, "TrimOligos", "stripForward");
		exit(1);
	}
}
//*******************************************************************/
int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group){
	try {
		string rawSequence = seq.getUnaligned();
		int success = pdiffs + 1;	//guilty until proven innocent
		
		//can you find the primer
		for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
			string oligo = it->first;
			if(rawSequence.length() < oligo.length()){	//let's just assume that the primers are the same length
				success = pdiffs + 10;					//if the sequence is shorter than the barcode then bail out
				break;	
			}
			
			if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
				group = it->second;
				seq.setUnaligned(rawSequence.substr(oligo.length()));
				if(qual.getName() != ""){
					qual.trimQScores(oligo.length(), -1);
				}
				success = 0;
				break;
			}
		}
		
		//if you found the barcode or if you don't want to allow for diffs
		if ((pdiffs == 0) || (success == 0)) { return success;  }
		
		else { //try aligning and see if you can find it
			
			int maxLength = 0;
			
			Alignment* alignment;
			if (primers.size() > 0) {
				map<string,int>::iterator it=primers.begin();
				
				for(it;it!=primers.end();it++){
					if(it->first.length() > maxLength){
						maxLength = it->first.length();
					}
				}
				alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
				
			}else{ alignment = NULL; } 
			
			//can you find the barcode
			int minDiff = 1e6;
			int minCount = 1;
			int minGroup = -1;
			int minPos = 0;
			
			for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
				string oligo = it->first;
				//				int length = oligo.length();
				
				if(rawSequence.length() < maxLength){	
					success = pdiffs + 100;
					break;
				}
				
				//use needleman to align first barcode.length()+numdiffs of sequence to each barcode
				alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
				oligo = alignment->getSeqAAln();
				string temp = alignment->getSeqBAln();
				
				int alnLength = oligo.length();
				
				for(int i=oligo.length()-1;i>=0;i--){
					if(oligo[i] != '-'){	alnLength = i+1;	break;	}
				}
				oligo = oligo.substr(0,alnLength);
				temp = temp.substr(0,alnLength);
				
				int numDiff = countDiffs(oligo, temp);
				
				if(numDiff < minDiff){
					minDiff = numDiff;
					minCount = 1;
					minGroup = it->second;
					minPos = 0;
					for(int i=0;i<alnLength;i++){
						if(temp[i] != '-'){
							minPos++;
						}
					}
				}
				else if(numDiff == minDiff){
					minCount++;
				}
				
			}
			
			if(minDiff > pdiffs)	{	success = minDiff;		}	//no good matches
			else if(minCount > 1)	{	success = pdiffs + 10;	}	//can't tell the difference between multiple primers
			else{													//use the best match
				group = minGroup;
				seq.setUnaligned(rawSequence.substr(minPos));
				if(qual.getName() != ""){
					qual.trimQScores(minPos, -1);
				}
				success = minDiff;
			}
			
			if (alignment != NULL) {  delete alignment;  }
			
		}
		
		return success;
		
	}
	catch(exception& e) {
		m->errorOut(e, "TrimOligos", "stripForward");
		exit(1);
	}
}
Exemple #3
0
//**********************************************************************************************************************
void alignDriver(alignStruct* params) {
	try {
        NastReport report;
		
		ifstream inFASTA;
		params->util.openInputFile(params->inputFilename, inFASTA);

		inFASTA.seekg(params->filePos.start);

		bool done = false;
        
		long long count = 0;
        long long numFlipped_0 = 0;
        long long numFlipped_1 = 0;
		
		//moved this into driver to avoid deep copies in windows paralellized version
		Alignment* alignment;
		int longestBase = params->templateDB->getLongestBase();
        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: template longest base = "  + toString(longestBase) + " \n"); }
		if(params->alignMethod == "gotoh")			{	alignment = new GotohOverlap(params->gapOpen, params->gapExtend, params->match, params->misMatch, longestBase);			}
		else if(params->alignMethod == "needleman")	{	alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, longestBase);				}
		else if(params->alignMethod == "blast")		{	alignment = new BlastAlignment(params->gapOpen, params->gapExtend, params->match, params->misMatch);		}
		else if(params->alignMethod == "noalign")		{	alignment = new NoAlign();													}
		else {
			params->m->mothurOut(params->alignMethod + " is not a valid alignment option. I will run the command using needleman.");
			params->m->mothurOutEndLine();
			alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, longestBase);
		}
	
		while (!done) {
			
			if (params->m->getControl_pressed()) {  break; }
			
			Sequence* candidateSeq = new Sequence(inFASTA);  params->util.gobble(inFASTA);
			report.setCandidate(candidateSeq);

			int origNumBases = candidateSeq->getNumBases();
			string originalUnaligned = candidateSeq->getUnaligned();
			int numBasesNeeded = origNumBases * params->threshold;
	
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) {
                    if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); }
					alignment->resize(candidateSeq->getUnaligned().length()+2);
				}
                
                float searchScore;
				Sequence temp = params->templateDB->findClosestSequence(candidateSeq, searchScore);
				Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
								
				Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
		
				Sequence* copy;
				
				Nast* nast2;
				bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
												//since nast does not make a copy of hte sequence passed, and it is used by the reporter below
												//you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
												//so this bool tells you if you need to delete it
												
				//if there is a possibility that this sequence should be reversed
				if (candidateSeq->getNumBases() < numBasesNeeded) {
					numFlipped_1++;
					string wasBetter =  "";
					//if the user wants you to try the reverse
					if (params->flip) {
				
						//get reverse compliment
						copy = new Sequence(candidateSeq->getName(), originalUnaligned);
						copy->reverseComplement();
                        
                        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: flipping "  + candidateSeq->getName() + " \n"); }
						
						//rerun alignment
						Sequence temp2 = params->templateDB->findClosestSequence(copy, searchScore);
						Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
                        
                        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: closest template "  + temp2.getName() + " \n"); }
						
						nast2 = new Nast(alignment, copy, templateSeq2);
                        
                        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: completed Nast2 "  + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); }
			
						//check if any better
						if (copy->getNumBases() > candidateSeq->getNumBases()) {
							candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
                            delete templateSeq;
							templateSeq = templateSeq2;
							delete nast;
							nast = nast2;
							needToDeleteCopy = true;
							wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
                            numFlipped_0++;
						}else{  
							wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
							delete nast2;
                            delete templateSeq2;
							delete copy;	
						}
                        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: done.\n"); }
					}
					
					//create accnos file with names
					params->accnosWriter->write(candidateSeq->getName() + wasBetter + "\n");
				}
				
				report.setTemplate(templateSeq);
				report.setSearchParameters(params->search, searchScore);
				report.setAlignmentParameters(params->alignMethod, alignment);
				report.setNastParameters(*nast);
	
				params->alignWriter->write('>' + candidateSeq->getName() + '\n' + candidateSeq->getAligned() + '\n');
				params->reportWriter->write(report.getReport());
				delete nast;
                delete templateSeq;
				if (needToDeleteCopy) {   delete copy;   }
                
				count++;
			}
			delete candidateSeq;
			
			#if defined NON_WINDOWS
				unsigned long long pos = inFASTA.tellg();
				if ((pos == -1) || (pos >= params->filePos.end)) { break; }
			#else
				if (count == params->filePos.end) { break; }
			#endif
			
			//report progress
			if((count) % 1000 == 0){	params->m->mothurOutJustToScreen(toString(count) + "\n"); 		}
			
		}
		//report progress
		if((count) % 1000 != 0){	params->m->mothurOutJustToScreen(toString(count) + "\n"); 		}
        
        params->numSeqs += count;
        params->flippedResults[0] += numFlipped_0;
        params->flippedResults[1] += numFlipped_1;
        
		delete alignment;
		inFASTA.close();
		
	}
	catch(exception& e) {
		params->m->errorOut(e, "AlignCommand", "driver");
		exit(1);
	}
}
Exemple #4
0
int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector<unsigned long long>& MPIPos){
	try {
		string outputString = "";
		MPI_Status statusReport; 
		MPI_Status statusAlign; 
		MPI_Status statusAccnos; 
		MPI_Status status; 
		int pid;
		MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
	
		NastReport report;
		
		if (pid == 0) {
			outputString = report.getHeaders();
			int length = outputString.length();
            
			char* buf = new char[length];
			memcpy(buf, outputString.c_str(), length);
		
			MPI_File_write_shared(reportFile, buf, length, MPI_CHAR, &statusReport);

            delete buf;
		}
		
		Alignment* alignment;
		int longestBase = templateDB->getLongestBase();
		if(align == "gotoh")			{	alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);			}
		else if(align == "needleman")	{	alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);				}
		else if(align == "blast")		{	alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);		}
		else if(align == "noalign")		{	alignment = new NoAlign();													}
		else {
			m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
			m->mothurOutEndLine();
			alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
		}
		
		
		for(int i=0;i<num;i++){
		
			if (m->control_pressed) { delete alignment; return 0; }

			//read next sequence
			int length = MPIPos[start+i+1] - MPIPos[start+i];

			char* buf4 = new char[length];
			//memcpy(buf4, outputString.c_str(), length);

			MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
			
			string tempBuf = buf4;

			delete buf4;

			if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
	
			istringstream iss (tempBuf,istringstream::in);

			Sequence* candidateSeq = new Sequence(iss);  
			report.setCandidate(candidateSeq);

			int origNumBases = candidateSeq->getNumBases();
			string originalUnaligned = candidateSeq->getUnaligned();
			int numBasesNeeded = origNumBases * threshold;
	
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
					alignment->resize(candidateSeq->getUnaligned().length()+1);
				}
								
				Sequence temp = templateDB->findClosestSequence(candidateSeq);
				Sequence* templateSeq = &temp;
				
				float searchScore = templateDB->getSearchScore();
								
				Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
				Sequence* copy;
				
				Nast* nast2;
				bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
												//since nast does not make a copy of hte sequence passed, and it is used by the reporter below
												//you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
												//so this bool tells you if you need to delete it
												
				//if there is a possibility that this sequence should be reversed
				if (candidateSeq->getNumBases() < numBasesNeeded) {
					
					string wasBetter = "";
					//if the user wants you to try the reverse
					if (flip) {
						//get reverse compliment
						copy = new Sequence(candidateSeq->getName(), originalUnaligned);
						copy->reverseComplement();
						
						//rerun alignment
						Sequence temp2 = templateDB->findClosestSequence(copy);
						Sequence* templateSeq2 = &temp2;
						
						searchScore = templateDB->getSearchScore();
						
						nast2 = new Nast(alignment, copy, templateSeq2);
			
						//check if any better
						if (copy->getNumBases() > candidateSeq->getNumBases()) {
							candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
							templateSeq = templateSeq2; 
							delete nast;
							nast = nast2;
							needToDeleteCopy = true;
							wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
						}else{  
							wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
							delete nast2;
							delete copy;	
						}
					}
					
					//create accnos file with names
					outputString = candidateSeq->getName() + wasBetter + "\n";
					
					//send results to parent
					int length = outputString.length();

					char* buf = new char[length];
					memcpy(buf, outputString.c_str(), length);
				
					MPI_File_write_shared(accnosFile, buf, length, MPI_CHAR, &statusAccnos);
					delete buf;
					MPIWroteAccnos = true;
				}
				
				report.setTemplate(templateSeq);
				report.setSearchParameters(search, searchScore);
				report.setAlignmentParameters(align, alignment);
				report.setNastParameters(*nast);
	
				outputString =  ">" + candidateSeq->getName() + "\n" + candidateSeq->getAligned() + "\n";
				
				//send results to parent
				int length = outputString.length();
				char* buf2 = new char[length];
				memcpy(buf2, outputString.c_str(), length);
				
				MPI_File_write_shared(alignFile, buf2, length, MPI_CHAR, &statusAlign);
				
				delete buf2;

				outputString = report.getReport();
				
				//send results to parent
				length = outputString.length();
				char* buf3 = new char[length];
				memcpy(buf3, outputString.c_str(), length);
				
				MPI_File_write_shared(reportFile, buf3, length, MPI_CHAR, &statusReport);
				
				delete buf3;
				delete nast;
				if (needToDeleteCopy) {   delete copy;   }
			}
			delete candidateSeq;
			
			//report progress
			if((i+1) % 100 == 0){	cout << (toString(i+1)) << endl;		}
		}
		//report progress
		if((num) % 100 != 0){	cout << (toString(num)) << endl;		}
		
		return 1;
	}
	catch(exception& e) {
		m->errorOut(e, "AlignCommand", "driverMPI");
		exit(1);
	}
}
//**********************************************************************************************************************
string ChopSeqsCommand::getChopped(Sequence seq, string& qualValues) {
	try {
		string temp = seq.getAligned();
		string tempUnaligned = seq.getUnaligned();
		
		if (countGaps) {
			//if needed trim sequence
			if (keep == "front") {//you want to keep the beginning
				int tempLength = temp.length();

				if (tempLength > numbases) { //you have enough bases to remove some
				
					int stopSpot = 0;
					int numBasesCounted = 0;
					
					for (int i = 0; i < temp.length(); i++) {
						//eliminate N's
                        if (!keepN) { if (toupper(temp[i]) == 'N') { temp[i] = '.'; } }
						
						numBasesCounted++; 
						
						if (numBasesCounted >= numbases) { stopSpot = i; break; }
					}
					
					if (stopSpot == 0) { temp = ""; }
					else {  temp = temp.substr(0, stopSpot+1);  }
							
				}else { 
					if (!Short) { temp = ""; } //sequence too short
				}
			}else { //you are keeping the back
				int tempLength = temp.length();
				if (tempLength > numbases) { //you have enough bases to remove some
					
					int stopSpot = 0;
					int numBasesCounted = 0;
					
					for (int i = (temp.length()-1); i >= 0; i--) {
						//eliminate N's
                        if (!keepN) { if (toupper(temp[i]) == 'N') { temp[i] = '.'; } }
						
						numBasesCounted++; 

						if (numBasesCounted >= numbases) { stopSpot = i; break; }
					}
				
					if (stopSpot == 0) { temp = ""; }
					else {  temp = temp.substr(stopSpot+1);  }
				}else { 
					if (!Short) { temp = ""; } //sequence too short
				}
			}

		}else{
				
			//if needed trim sequence
			if (keep == "front") {//you want to keep the beginning
				int tempLength = tempUnaligned.length();

				if (tempLength > numbases) { //you have enough bases to remove some
					
					int stopSpot = 0;
					int numBasesCounted = 0;
					
					for (int i = 0; i < temp.length(); i++) {
						//eliminate N's
                        if (!keepN) {
                            if (toupper(temp[i]) == 'N') {
                                temp[i] = '.';
                                tempLength--;
                                if (tempLength < numbases) { stopSpot = 0; break; }
                            }
                        }
						if(isalpha(temp[i])) { numBasesCounted++; }
						
						if (numBasesCounted >= numbases) { stopSpot = i; break; }
					}
					
					if (stopSpot == 0) { temp = ""; }
					else {  temp = temp.substr(0, stopSpot+1);  }
                    
					qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(stopSpot+1) + '\n';
                    
				}else { 
					if (!Short) { temp = ""; qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(0) + '\n'; } //sequence too short
                    else { qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(tempLength) + '\n'; }
				}				
			}else { //you are keeping the back
				int tempLength = tempUnaligned.length();
				if (tempLength > numbases) { //you have enough bases to remove some
					
					int stopSpot = 0;
					int numBasesCounted = 0;
					
					for (int i = (temp.length()-1); i >= 0; i--) {
                        if (!keepN) {
                            //eliminate N's
                            if (toupper(temp[i]) == 'N') {
                                temp[i] = '.';
                                tempLength--;
                                if (tempLength < numbases) { stopSpot = 0; break; }
                            }
                        }
						if(isalpha(temp[i])) { numBasesCounted++; }

						if (numBasesCounted >= numbases) { stopSpot = i; break; }
					}
				
					if (stopSpot == 0) { temp = ""; }
					else {  temp = temp.substr(stopSpot);  }
                    
                    qualValues = seq.getName() +'\t' + toString(stopSpot) + '\t' + toString(temp.length()-1) + '\n';
                    
				}else { 
					if (!Short) { temp = ""; qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(0) + '\n'; } //sequence too short
                    else { qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(tempLength) + '\n'; }
				}
			}
		}
		
		return temp;
	}
	catch(exception& e) {
		m->errorOut(e, "ChopSeqsCommand", "getChopped");
		exit(1);
	}
}