Exemple #1
0
bool QualityScores::stripQualThreshold(Sequence& sequence, double qThreshold){
	try {
		string rawSequence = sequence.getUnaligned();
		int seqLength = sequence.getNumBases();
		
		if(seqName != sequence.getName()){
			m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
			m->mothurOutEndLine();	
		}
		
		int end;
		for(int i=0;i<seqLength;i++){
			end = i;
			if(qScores[i] < qThreshold){
				break;
			}
		}
		
		//every score passed
		if (end == (seqLength-1)) { end = seqLength; }
		
		sequence.setUnaligned(rawSequence.substr(0,end));
		trimQScores(-1, end);
		
		return 1;
	}
	catch(exception& e) {
		m->errorOut(e, "QualityScores", "flipQScores");
		exit(1);
	}							
	
}
Exemple #2
0
FastqRead::FastqRead(Sequence s, QualityScores q, string f) {
    try {
        m = MothurOut::getInstance(); format = f;
        
        //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
        for (int i = -64; i < 65; i++) {
            char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
            convertTable.push_back(temp);
            convertBackTable.push_back(((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499)));
        }
        
        if (s.getName() != q.getName()) { m->mothurOut("[ERROR]: sequence name does not match quality score name. Cannot construct fastq object.\n"); m->control_pressed = true; }
        else {
            name = s.getName();
            comment = s.getComment();
            sequence = s.getUnaligned();
            scores = q.getScores();
            scoreString = convertQual(scores);
        }
        
        
    }
    catch(exception& e) {
        m->errorOut(e, "FastqRead", "FastqRead");
        exit(1);
    }
}
int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
	try {
				
		MPI_Status status; 
		int pid;
		MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
		
		for(int i=0;i<num;i++){
			
			if (m->control_pressed) {	return 1;	}
			
			//read next sequence
			int length = MPIPos[start+i+1] - MPIPos[start+i];
	
			char* buf4 = new char[length];
			MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
			
			string tempBuf = buf4;
			if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
			istringstream iss (tempBuf,istringstream::in);
			delete buf4;

			Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
				
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				
				if	(candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
					m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
				}else{
					//find chimeras
					chimera->getChimeras(candidateSeq);
					
					if (m->control_pressed) {	delete candidateSeq; return 1;	}
		
					//print results
					chimera->print(outMPI, outAccMPI);
				}
			}
			delete candidateSeq;
			
			//report progress
			if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;			}
		}
		//report progress
		if(num % 100 != 0){		cout << "Processing sequence: " << num << endl;	 	}
		
				
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
		exit(1);
	}
}
Exemple #4
0
bool QualityScores::stripQualWindowAverage(Sequence& sequence, int stepSize, int windowSize, double qThreshold){
	try {
		string rawSequence = sequence.getUnaligned();
		int seqLength = sequence.getNumBases();
		
		if(seqName != sequence.getName()){
			m->mothurOut("sequence name mismatch between fasta: " + sequence.getName() + " and qual file: " + seqName);
			m->mothurOutEndLine();
		}
		
		int end = windowSize;
		int start = 0;

		if(seqLength < windowSize) {	return 0;	}
			
		while((start+windowSize) < seqLength){
			double windowSum = 0.0000;

			for(int i=start;i<end;i++){
				windowSum += qScores[i];
			}
			double windowAverage = windowSum / (double)(end-start);
				
			if(windowAverage < qThreshold){
				end = end - stepSize;
				break;
			}
			
			start += stepSize;
			end = start + windowSize;
				
			if(end >= seqLength){	end = seqLength;	}
				
		}
	
		if(end == -1){	end = seqLength;	}
		
		//failed first window
		if (end < windowSize) { return 0; }
			
		sequence.setUnaligned(rawSequence.substr(0,end));
		trimQScores(-1, end);
		
		return 1;
	}
	catch(exception& e) {
		m->errorOut(e, "QualityScores", "stripQualWindowAverage");
		exit(1);
	}							
	
}
void SequenceTools::getPutativeHaplotypes(const Sequence& seq, std::vector<Sequence*>& hap, unsigned int level)
{
  vector< vector< int > > states(seq.size());
  list<Sequence*> t_hap;
  const Alphabet* alpha = seq.getAlphabet();
  unsigned int hap_count = 1;
  // Vector of available states at each position
  for (size_t i = 0; i < seq.size(); i++)
  {
    vector<int> st = alpha->getAlias(seq[i]);
    if (!st.size())
    {
      st.push_back(alpha->getGapCharacterCode());
    }
    if (st.size() <= level)
    {
      states[i] = st;
    }
    else
    {
      states[i] = vector<int>(1, seq[i]);
    }
  }
  // Combinatorial haplotypes building (the use of tree may be more accurate)
  t_hap.push_back(new BasicSequence(seq.getName() + "_hap" + TextTools::toString(hap_count++), "", alpha));
  for (size_t i = 0; i < states.size(); i++)
  {
    for (list<Sequence*>::iterator it = t_hap.begin(); it != t_hap.end(); it++)
    {
      for (unsigned int j = 0; j < states[i].size(); j++)
      {
        Sequence* tmp_seq = new BasicSequence(seq.getName() + "_hap", (**it).getContent(), alpha);
        if (j < states[i].size() - 1)
        {
          tmp_seq->setName(tmp_seq->getName() + TextTools::toString(hap_count++));
          tmp_seq->addElement(states[i][j]);
          t_hap.insert(it, tmp_seq);
        }
        else
        {
          (**it).addElement(states[i][j]);
        }
      }
    }
  }
  for (list<Sequence*>::reverse_iterator it = t_hap.rbegin(); it != t_hap.rend(); it++)
  {
    hap.push_back(*it);
  }
}
unsigned int SequenceFeatureTools::getOrfs(const Sequence& seq, SequenceFeatureSet& featSet, const GeneticCode& gCode)
{
  if (! AlphabetTools::isNucleicAlphabet(seq.getAlphabet())) {
    throw AlphabetException("SequenceFeatureTools::getOrfs: Sequence alphabet must be nucleic!", seq.getAlphabet());
  }
  unsigned int orfCpt = 0;
  const CodonAlphabet* codonAlpha = gCode.getSourceAlphabet();
  std::vector< std::vector<size_t> > starts(3), stops(3);
  size_t phase = 0;
  for (size_t p = 0 ; p < seq.size() - 2 ; p++) {
    phase = p % 3;
    if (gCode.isStart(codonAlpha->getCodon(seq.getValue(p), seq.getValue(p + 1), seq.getValue(p + 2)))) {
      starts[phase].push_back(p);
      //std::cerr << "Start: " << p << " (" << phase << ")" << std::endl;
    } else if (gCode.isStop(codonAlpha->getCodon(seq.getValue(p), seq.getValue(p + 1), seq.getValue(p + 2)))) {
      stops[phase].push_back(p);
      //std::cerr << "Stop:  " << p << " (" << phase << ")" << std::endl;
    }
  }
  for (size_t i = 0 ; i < 3 ; ++i) {
    std::vector< size_t >::iterator start(starts[i].begin()), stop(stops[i].begin());
    while (stop != stops[i].end() && start != starts[i].end()) {
      if (*stop < *start) {
        stop++;
      } else {
        orfCpt++;
        //std::cerr << "ORF:  " << *start << " - " << *stop + 2 << " (" << i << ")" << std::endl;
        bpp::BasicSequenceFeature feat("", seq.getName(), "Bio++", "CDS", *start, *stop + 2, '+');
        featSet.addFeature(feat);
        start++;
      }
    }
  }
  return orfCpt;
}
int ChimeraCheckCommand::driver(linePair* filePos, string outputFName, string filename){
	try {
		ofstream out;
		m->openOutputFile(outputFName, out);
		
		ofstream out2;
		
		ifstream inFASTA;
		m->openInputFile(filename, inFASTA);

		inFASTA.seekg(filePos->start);

		bool done = false;
		int count = 0;
	
		while (!done) {

			if (m->control_pressed) {	return 1;	}
		
			Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
				
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				//find chimeras
				chimera->getChimeras(candidateSeq);
				
				if (m->control_pressed) {	delete candidateSeq; return 1;	}
	
				//print results
				chimera->print(out, out2);
                count++;
			}
			delete candidateSeq;
			
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				unsigned long long pos = inFASTA.tellg();
				if ((pos == -1) || (pos >= filePos->end)) { break; }
			#else
				if (inFASTA.eof()) { break; }
			#endif
			
			//report progress
			if((count) % 100 == 0){	m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");		}
		}
		//report progress
		if((count) % 100 != 0){	m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");	}
		
		out.close();
		inFASTA.close();
				
		return count;
	}
	catch(exception& e) {
		m->errorOut(e, "ChimeraCheckCommand", "driver");
		exit(1);
	}
}
void VectorSiteContainer::setSequence(size_t pos, const Sequence& sequence, bool checkNames)
throw (Exception)
{
  if (pos >= getNumberOfSequences())
    throw IndexOutOfBoundsException("VectorSiteContainer::setSequence", pos, 0, getNumberOfSequences() - 1);

  // New sequence's alphabet and site container's alphabet matching verification
  if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
    throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), sequence.getAlphabet());

  // If the container has only one sequence, we set the size to the size of this sequence:
  if (getNumberOfSequences() == 1)
    realloc(sequence.size());

  if (sequence.size() != sites_.size())
    throw SequenceException("VectorSiteContainer::setSequence. Sequence has not the appropriate length.", &sequence);

  if (checkNames)
  {
    for (size_t i = 0; i < names_.size(); i++)
    {
      if (i != pos && sequence.getName() == names_[i])
        throw SequenceException("VectorSiteContainer::settSequence. Name already exists in container.", &sequence);
    }
  }
  // Update name:
  names_[pos] = sequence.getName();
  // Update elements at each site:
  for (size_t i = 0; i < sites_.size(); i++)
  {
    sites_[i]->setElement(pos, sequence.getValue(i));
  }
  // Update comments:
  if (comments_[pos])
    delete comments_[pos];
  comments_[pos] = new Comments(sequence.getComments());
  // Update sequences:
  if (sequences_[pos])
    delete sequences_[pos];
  sequences_[pos] = 0;
}
Exemple #9
0
bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage){
	try {
		string rawSequence = sequence.getUnaligned();
		bool success = 0;	//guilty until proven innocent
		
		if(seqName != sequence.getName())	{
			m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
			m->mothurOutEndLine();	
		} 
			
		double aveQScore = calculateAverage();
		
		if(aveQScore >= qAverage)	{	success = 1;	}
		else						{	success = 0;	}
		
		return success;
	}
	catch(exception& e) {
		m->errorOut(e, "QualityScores", "cullQualAverage");
		exit(1);
	}
}
Exemple #10
0
bool QualityScores::stripQualRollingAverage(Sequence& sequence, double qThreshold){
	try {
		string rawSequence = sequence.getUnaligned();
		int seqLength = sequence.getNumBases();
		
		if(seqName != sequence.getName()){
			m->mothurOut("sequence name mismatch btwn fasta: " + sequence.getName() + " and qual file: " + seqName);
			m->mothurOutEndLine();	
		}
		
		int end = -1;
		double rollingSum = 0.0000;
		
		for(int i=0;i<seqLength;i++){

			rollingSum += (double)qScores[i];
			
			if(rollingSum / (double)(i+1) < qThreshold){
				end = i;
				break;
			}
		}
		
		if(end == -1){	end = seqLength;	}
		
		
		sequence.setUnaligned(rawSequence.substr(0,end));
		trimQScores(-1, end);
		
		
		return 1;
	}
	catch(exception& e) {
		m->errorOut(e, "QualityScores", "flipQScores");
		exit(1);
	}							
	
}
void VectorSiteContainer::addSequence(const Sequence& sequence, bool checkNames) throw (Exception)
{
  // If the container has no sequence, we set the size to the size of this sequence:
  if (getNumberOfSequences() == 0)
    realloc(sequence.size());

  // New sequence's alphabet and site container's alphabet matching verification
  if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
    throw AlphabetMismatchException("VectorSiteContainer::addSequence", getAlphabet(), sequence.getAlphabet());

  if (sequence.size() != sites_.size())
    throw SequenceException("VectorSiteContainer::addSequence. Sequence has not the appropriate length: " + TextTools::toString(sequence.size()) + ", should be " + TextTools::toString(sites_.size()) + ".", &sequence);

  if (checkNames)
  {
    for (size_t i = 0; i < names_.size(); i++)
    {
      if (sequence.getName() == names_[i])
        throw SequenceException("VectorSiteContainer::addSequence. Name already exists in container.", &sequence);
    }
  }

  // Append name:
  names_.push_back(sequence.getName());

  // Append elements at each site:
  for (size_t i = 0; i < sites_.size(); i++)
  {
    sites_[i]->addElement(sequence.getValue(i));
  }

  // Append comments:
  comments_.push_back(new Comments(sequence.getComments()));

  // Sequences pointers:
  sequences_.push_back(0);
}
void VectorSiteContainer::addSequence(
  const Sequence& sequence,
  size_t pos,
  bool checkNames)
throw (Exception)
{
  if (pos >= getNumberOfSequences())
    throw IndexOutOfBoundsException("VectorSiteContainer::addSequence.", pos, 0, getNumberOfSequences() - 1);
  if (sequence.size() != sites_.size())
    throw SequenceNotAlignedException("VectorSiteContainer::setSequence", &sequence);

  // New sequence's alphabet and site container's alphabet matching verification
  if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
  {
    throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), sequence.getAlphabet());
  }

  if (checkNames)
  {
    for (size_t i = 0; i < names_.size(); i++)
    {
      if (sequence.getName() == names_[i])
        throw SequenceException("VectorSiteContainer::addSequence. Name already exists in container.", &sequence);
    }
  }

  for (size_t i = 0; i < sites_.size(); i++)
  {
    // For each site:
    sites_[i]->addElement(pos, sequence.getValue(i));
  }
  // Actualize names and comments:
  names_.insert(names_.begin() + pos, sequence.getName());
  comments_.insert(comments_.begin() + pos, new Comments(sequence.getComments()));
  sequences_.insert(sequences_.begin() + pos, 0);
}
Exemple #13
0
void DistanceDB::addSequence(Sequence seq) {
	try {
		//are the template sequences aligned
		if (!isAligned(seq.getAligned())) {
			templateAligned = false;
			m->mothurOut(seq.getName() + " is not aligned. Sequences must be aligned to use the distance method.");
			m->mothurOutEndLine(); 
		}
		
		if (templateSeqsLength == 0) { templateSeqsLength = seq.getAligned().length(); }
				
		data.push_back(seq);
	}
	catch(exception& e) {
		m->errorOut(e, "DistanceDB", "addSequence");
		exit(1);
	}	
}
Exemple #14
0
//**********************************************************************************************************************
void driverClassifier(classifyData* params){
    try {
        ifstream inFASTA;
        params->util.openInputFile(params->filename, inFASTA);
        
        string taxonomy;
        
        inFASTA.seekg(params->start);
        
        bool done = false;
        string taxBuffer = ""; string taxTBuffer = ""; string accnosBuffer = "";
        while (!done) {
            if (params->m->getControl_pressed()) { break; }
            
            Sequence* candidateSeq = new Sequence(inFASTA); params->util.gobble(inFASTA);
            
            if (candidateSeq->getName() != "") {
                
                string simpleTax = ""; bool flipped = false;
                taxonomy = params->classify->getTaxonomy(candidateSeq, simpleTax, flipped);
                
                if (params->m->getControl_pressed()) { delete candidateSeq; break; }
                
                if (taxonomy == "unknown;") { params->m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences.\n");  }
                
                //output confidence scores or not
                if (params->probs)  { taxBuffer += candidateSeq->getName() + '\t' + taxonomy + '\n';    }
                else                { taxBuffer += candidateSeq->getName() + '\t' + simpleTax + '\n';   }
                
                if (flipped) { accnosBuffer += candidateSeq->getName() + '\n'; }
                
                taxTBuffer = candidateSeq->getName() + '\t' + simpleTax + '\n';
                
                params->count++;
            }
            delete candidateSeq;
            
            //report progress
            if((params->count) % 100 == 0){
                params->m->mothurOutJustToScreen(toString(params->count) +"\n");
                params->taxTWriter->write(taxTBuffer); taxTBuffer = "";
                params->taxWriter->write(taxBuffer); taxBuffer = "";
                if (accnosBuffer != "") { params->accnosWriter->write(accnosBuffer); accnosBuffer = ""; }
            }
            
#if defined NON_WINDOWS
            unsigned long long pos = inFASTA.tellg();
            if ((pos == -1) || (pos >= params->end)) { break; }
#else
            if (params->count == params->end) { break; }
#endif
            
    }
        //report progress
        if((params->count) % 100 != 0){
            params->m->mothurOutJustToScreen(toString(params->count)+"\n");
            params->taxTWriter->write(taxTBuffer); taxTBuffer = "";
            params->taxWriter->write(taxBuffer); taxBuffer = "";
            if (accnosBuffer != "") { params->accnosWriter->write(accnosBuffer); accnosBuffer = ""; }
        }
        
        inFASTA.close();
    }
    catch(exception& e) {
        params->m->errorOut(e, "ClassifySeqsCommand", "driver");
        exit(1);
    }
}
Exemple #15
0
//***************************************************************************************************************
//gets closest matches to each end, since chimeras will most likely have different parents on each end
vector<Sequence> DeCalculator::findClosest(Sequence querySeq, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate, int numWanted, int minSim) {
	try {
		//indexes.clear();
		
		vector<Sequence> seqsMatches;  
		
		vector<SeqDist> distsLeft;
		vector<SeqDist> distsRight;
		
		Dist* distcalculator = new eachGapDist();
		
		string queryUnAligned = querySeq.getUnaligned();
		int numBases = int(queryUnAligned.length() * 0.33);
		
		string leftQuery = ""; //first 1/3 of the sequence
		string rightQuery = ""; //last 1/3 of the sequence
		string queryAligned = querySeq.getAligned();
		
		//left side
		bool foundFirstBase = false;
		int baseCount = 0;
		int leftSpot = 0;
		int firstBaseSpot = 0;
		for (int i = 0; i < queryAligned.length(); i++) {
			//if you are a base
			if (isalpha(queryAligned[i])) {		
				baseCount++; 
				if (!foundFirstBase) {   foundFirstBase = true;  firstBaseSpot = i;  }
			}
			
			//eliminate opening .'s
			if (foundFirstBase) {   leftQuery += queryAligned[i];  }
			//if you have 1/3
			if (baseCount >= numBases) {  leftSpot = i; break; } //first 1/3
		}
		
		//right side - count through another 1/3, so you are at last third
		baseCount = 0;
		int rightSpot = 0;
		for (int i = leftSpot; i < queryAligned.length(); i++) {
			//if you are a base
			if (isalpha(queryAligned[i])) {		baseCount++;	}
			//if you have 1/3
			if (baseCount > numBases + 1) { rightSpot = i;  break; } //last 1/3
		}
		
		//trim end
		//find last position in query that is a non gap character
		int lastBaseSpot = queryAligned.length()-1;
		for (int j = queryAligned.length()-1; j >= 0; j--) {
			if (isalpha(queryAligned[j])) {
				lastBaseSpot = j;
				break;
			}
		}
		rightQuery = queryAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //sequence from pos spot to end
		
		Sequence queryLeft(querySeq.getName(), leftQuery);
		Sequence queryRight(querySeq.getName(), rightQuery);
		
//cout << querySeq->getName() << '\t' << leftSpot << '\t' << rightSpot << '\t' << firstBaseSpot << '\t' << lastBaseSpot << endl;
//cout << queryUnAligned.length() << '\t' << queryLeft.getUnaligned().length() << '\t' << queryRight.getUnaligned().length() << endl;
		for(int j = 0; j < thisFilteredTemplate.size(); j++){
			
			string dbAligned = thisFilteredTemplate[j]->getAligned();
			string leftDB = dbAligned.substr(firstBaseSpot, (leftSpot-firstBaseSpot+1)); //first 1/3 of the sequence
			string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //last 1/3 of the sequence

			Sequence dbLeft(thisFilteredTemplate[j]->getName(), leftDB);
			Sequence dbRight(thisFilteredTemplate[j]->getName(), rightDB);

			distcalculator->calcDist(queryLeft, dbLeft);
			float distLeft = distcalculator->getDist();
			
			distcalculator->calcDist(queryRight, dbRight);
			float distRight = distcalculator->getDist();

			SeqDist subjectLeft;
			subjectLeft.seq = NULL;
			subjectLeft.dist = distLeft;
			subjectLeft.index = j;
			
			distsLeft.push_back(subjectLeft);
			
			SeqDist subjectRight;
			subjectRight.seq = NULL;
			subjectRight.dist = distRight;
			subjectRight.index = j;
			
			distsRight.push_back(subjectRight);

		}
		
		delete distcalculator;
		
		//sort by smallest distance
		sort(distsRight.begin(), distsRight.end(), compareSeqDist);
		sort(distsLeft.begin(), distsLeft.end(), compareSeqDist);

		
		//merge results		
		map<string, string> seen;
		map<string, string>::iterator it;
		
		vector<SeqDist> dists;
		float lastRight = distsRight[0].dist;
		float lastLeft = distsLeft[0].dist;

		float maxDist = 1.0 - (minSim / 100.0);

		for (int i = 0; i < numWanted+1; i++) {
			if (m->control_pressed) { return seqsMatches; }
			
			//add left if you havent already
			it = seen.find(thisTemplate[distsLeft[i].index]->getName());
			if (it == seen.end() && distsLeft[i].dist <= maxDist) {  
				dists.push_back(distsLeft[i]);
				seen[thisTemplate[distsLeft[i].index]->getName()] = thisTemplate[distsLeft[i].index]->getName();
				lastLeft =  distsLeft[i].dist;
//				cout << "loop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl;
			}

			//add right if you havent already
			it = seen.find(thisTemplate[distsRight[i].index]->getName());
			if (it == seen.end() && distsRight[i].dist <= maxDist) {  
				dists.push_back(distsRight[i]);
				seen[thisTemplate[distsRight[i].index]->getName()] = thisTemplate[distsRight[i].index]->getName();
				lastRight =  distsRight[i].dist;
//				cout << "loop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl;
			}
			
			if (i == numWanted) { break; }
			
		}
		
		//are we still above the minimum similarity cutoff
		if ((lastLeft >= minSim) || (lastRight >= minSim)) {
			//add in ties from left
			int i = numWanted;
			while (i < distsLeft.size()) {  
				if (distsLeft[i].dist == lastLeft) {  dists.push_back(distsLeft[i]);  }
				else { break; }
				i++;
			}
			
			//add in ties from right
			i = numWanted;
			while (i < distsRight.size()) {  
				if (distsRight[i].dist == lastRight) {  dists.push_back(distsRight[i]);  }
				else { break; }
				i++;
			}
		}
		
		//cout << numWanted << endl;
		for (int i = 0; i < dists.size(); i++) {
//			cout << db[dists[i].index]->getName() << '\t' << dists[i].dist << endl;

			if ((thisTemplate[dists[i].index]->getName() != querySeq.getName()) && (((1.0-dists[i].dist)*100) >= minSim)) {
				Sequence temp(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
				//cout << querySeq->getName() << '\t' << thisTemplate[dists[i].index]->getName()  << '\t' << dists[i].dist << endl;
				seqsMatches.push_back(temp);
			}

		}
		
		return seqsMatches;
	}
	catch(exception& e) {
		m->errorOut(e, "DeCalculator", "findClosest");
		exit(1);
	}
}
Exemple #16
0
int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& alignFile, MPI_File& reportFile, MPI_File& accnosFile, vector<unsigned long long>& MPIPos){
	try {
		string outputString = "";
		MPI_Status statusReport; 
		MPI_Status statusAlign; 
		MPI_Status statusAccnos; 
		MPI_Status status; 
		int pid;
		MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
	
		NastReport report;
		
		if (pid == 0) {
			outputString = report.getHeaders();
			int length = outputString.length();
            
			char* buf = new char[length];
			memcpy(buf, outputString.c_str(), length);
		
			MPI_File_write_shared(reportFile, buf, length, MPI_CHAR, &statusReport);

            delete buf;
		}
		
		Alignment* alignment;
		int longestBase = templateDB->getLongestBase();
		if(align == "gotoh")			{	alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);			}
		else if(align == "needleman")	{	alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);				}
		else if(align == "blast")		{	alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);		}
		else if(align == "noalign")		{	alignment = new NoAlign();													}
		else {
			m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
			m->mothurOutEndLine();
			alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
		}
		
		
		for(int i=0;i<num;i++){
		
			if (m->control_pressed) { delete alignment; return 0; }

			//read next sequence
			int length = MPIPos[start+i+1] - MPIPos[start+i];

			char* buf4 = new char[length];
			//memcpy(buf4, outputString.c_str(), length);

			MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
			
			string tempBuf = buf4;

			delete buf4;

			if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
	
			istringstream iss (tempBuf,istringstream::in);

			Sequence* candidateSeq = new Sequence(iss);  
			report.setCandidate(candidateSeq);

			int origNumBases = candidateSeq->getNumBases();
			string originalUnaligned = candidateSeq->getUnaligned();
			int numBasesNeeded = origNumBases * threshold;
	
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
					alignment->resize(candidateSeq->getUnaligned().length()+1);
				}
								
				Sequence temp = templateDB->findClosestSequence(candidateSeq);
				Sequence* templateSeq = &temp;
				
				float searchScore = templateDB->getSearchScore();
								
				Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
				Sequence* copy;
				
				Nast* nast2;
				bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
												//since nast does not make a copy of hte sequence passed, and it is used by the reporter below
												//you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
												//so this bool tells you if you need to delete it
												
				//if there is a possibility that this sequence should be reversed
				if (candidateSeq->getNumBases() < numBasesNeeded) {
					
					string wasBetter = "";
					//if the user wants you to try the reverse
					if (flip) {
						//get reverse compliment
						copy = new Sequence(candidateSeq->getName(), originalUnaligned);
						copy->reverseComplement();
						
						//rerun alignment
						Sequence temp2 = templateDB->findClosestSequence(copy);
						Sequence* templateSeq2 = &temp2;
						
						searchScore = templateDB->getSearchScore();
						
						nast2 = new Nast(alignment, copy, templateSeq2);
			
						//check if any better
						if (copy->getNumBases() > candidateSeq->getNumBases()) {
							candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
							templateSeq = templateSeq2; 
							delete nast;
							nast = nast2;
							needToDeleteCopy = true;
							wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
						}else{  
							wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
							delete nast2;
							delete copy;	
						}
					}
					
					//create accnos file with names
					outputString = candidateSeq->getName() + wasBetter + "\n";
					
					//send results to parent
					int length = outputString.length();

					char* buf = new char[length];
					memcpy(buf, outputString.c_str(), length);
				
					MPI_File_write_shared(accnosFile, buf, length, MPI_CHAR, &statusAccnos);
					delete buf;
					MPIWroteAccnos = true;
				}
				
				report.setTemplate(templateSeq);
				report.setSearchParameters(search, searchScore);
				report.setAlignmentParameters(align, alignment);
				report.setNastParameters(*nast);
	
				outputString =  ">" + candidateSeq->getName() + "\n" + candidateSeq->getAligned() + "\n";
				
				//send results to parent
				int length = outputString.length();
				char* buf2 = new char[length];
				memcpy(buf2, outputString.c_str(), length);
				
				MPI_File_write_shared(alignFile, buf2, length, MPI_CHAR, &statusAlign);
				
				delete buf2;

				outputString = report.getReport();
				
				//send results to parent
				length = outputString.length();
				char* buf3 = new char[length];
				memcpy(buf3, outputString.c_str(), length);
				
				MPI_File_write_shared(reportFile, buf3, length, MPI_CHAR, &statusReport);
				
				delete buf3;
				delete nast;
				if (needToDeleteCopy) {   delete copy;   }
			}
			delete candidateSeq;
			
			//report progress
			if((i+1) % 100 == 0){	cout << (toString(i+1)) << endl;		}
		}
		//report progress
		if((num) % 100 != 0){	cout << (toString(num)) << endl;		}
		
		return 1;
	}
	catch(exception& e) {
		m->errorOut(e, "AlignCommand", "driverMPI");
		exit(1);
	}
}
Exemple #17
0
//***************************************************************************************************************
vector<sim> ChimeraCheckRDP::findIS() {
	try {
		
		
		vector< map<int, int> > queryKmerInfo;	//vector of maps - each entry in the vector is a map of the kmers up to that spot in the unaligned seq
												//example:  seqKmerInfo[50] = map containing the kmers found in the first 50 + kmersize characters of ecoli.
												//i chose to store the kmers numbers in a map so you wouldn't have to check for dupilcate entries and could easily find the 
												//kmers 2 seqs had in common.  There may be a better way to do this thats why I am leaving so many comments...
		vector< map<int, int> > subjectKmerInfo;
		
		vector<sim>  isValues;
		string queryName = querySeq->getName();
		string seq = querySeq->getUnaligned();
		
		queryKmerInfo = kmer->getKmerCounts(seq);
		subjectKmerInfo = kmer->getKmerCounts(closest.getUnaligned());
		
		//find total kmers you have in common with closest[query] by looking at the last entry in the vector of maps for each
		int nTotal = calcKmers(queryKmerInfo[(queryKmerInfo.size()-1)], subjectKmerInfo[(subjectKmerInfo.size()-1)]);

		//you don't want the starting point to be virtually at hte end so move it in 10%
		int start = seq.length() / 10;
			
		//for each window
		for (int f = start; f < (seq.length() - start); f+=increment) {
		
			if (m->control_pressed) { return isValues; }
			
			if ((f - kmerSize) < 0)  { m->mothurOut("Your sequence is too short for your kmerSize."); m->mothurOutEndLine(); exit(1); }
			
			sim temp;
			
			string fragLeft = seq.substr(0, f);  //left side of breakpoint
			string fragRight = seq.substr(f);  //right side of breakpoint
			
			//make a sequence of the left side and right side
			Sequence* left = new Sequence(queryName, fragLeft);
			Sequence* right = new Sequence(queryName, fragRight);
			
			//find seqs closest to each fragment
			Sequence closestLeft = templateDB->findClosestSequence(left); 
	
			Sequence closestRight = templateDB->findClosestSequence(right); 
		
			//get kmerinfo for the closest left
			vector< map<int, int> > closeLeftKmerInfo = kmer->getKmerCounts(closestLeft.getUnaligned());
			
			//get kmerinfo for the closest right
			vector< map<int, int> > closeRightKmerInfo = kmer->getKmerCounts(closestRight.getUnaligned());
			
			//right side is tricky - since the counts grow on eachother to find the correct counts of only the right side you must subtract the counts of the left side
			//iterate through left sides map to subtract the number of times you saw things before you got the the right side
			map<int, int> rightside = queryKmerInfo[queryKmerInfo.size()-1];
			for (map<int, int>::iterator itleft = queryKmerInfo[f-kmerSize].begin(); itleft != queryKmerInfo[f-kmerSize].end(); itleft++) {
				int howManyTotal = queryKmerInfo[queryKmerInfo.size()-1][itleft->first];   //times that kmer was seen in total

				//itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
				int howmanyright = howManyTotal - itleft->second;
				
				//if any were seen just on the left erase
				if (howmanyright == 0) {
					rightside.erase(itleft->first);
				}
			}
			
			map<int, int> closerightside = closeRightKmerInfo[closeRightKmerInfo.size()-1];
			for (map<int, int>::iterator itright = closeRightKmerInfo[f-kmerSize].begin(); itright != closeRightKmerInfo[f-kmerSize].end(); itright++) {
				int howManyTotal = closeRightKmerInfo[(closeRightKmerInfo.size()-1)][itright->first];   //times that kmer was seen in total

				//itleft->second is times it was seen in left side, so howmanytotal - leftside should give you right side
				int howmanyright = howManyTotal - itright->second;
				
				//if any were seen just on the left erase
				if (howmanyright == 0) {
					closerightside.erase(itright->first);
				}
			}

			
			int nLeft = calcKmers(closeLeftKmerInfo[f-kmerSize], queryKmerInfo[f-kmerSize]);

			int nRight = calcKmers(closerightside, rightside);

			int is = nLeft + nRight - nTotal;

			//save IS, leftparent, rightparent, breakpoint
			temp.leftParent = closestLeft.getName();
			temp.rightParent = closestRight.getName();
			temp.score = is;
			temp.midpoint = f;
			
			isValues.push_back(temp);
			
			delete left;
			delete right;
		}
		
		return isValues;
	
	}
	catch(exception& e) {
		m->errorOut(e, "ChimeraCheckRDP", "findIS");
		exit(1);
	}
}
Exemple #18
0
//**********************************************************************************************************************
void alignDriver(alignStruct* params) {
	try {
        NastReport report;
		
		ifstream inFASTA;
		params->util.openInputFile(params->inputFilename, inFASTA);

		inFASTA.seekg(params->filePos.start);

		bool done = false;
        
		long long count = 0;
        long long numFlipped_0 = 0;
        long long numFlipped_1 = 0;
		
		//moved this into driver to avoid deep copies in windows paralellized version
		Alignment* alignment;
		int longestBase = params->templateDB->getLongestBase();
        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: template longest base = "  + toString(longestBase) + " \n"); }
		if(params->alignMethod == "gotoh")			{	alignment = new GotohOverlap(params->gapOpen, params->gapExtend, params->match, params->misMatch, longestBase);			}
		else if(params->alignMethod == "needleman")	{	alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, longestBase);				}
		else if(params->alignMethod == "blast")		{	alignment = new BlastAlignment(params->gapOpen, params->gapExtend, params->match, params->misMatch);		}
		else if(params->alignMethod == "noalign")		{	alignment = new NoAlign();													}
		else {
			params->m->mothurOut(params->alignMethod + " is not a valid alignment option. I will run the command using needleman.");
			params->m->mothurOutEndLine();
			alignment = new NeedlemanOverlap(params->gapOpen, params->match, params->misMatch, longestBase);
		}
	
		while (!done) {
			
			if (params->m->getControl_pressed()) {  break; }
			
			Sequence* candidateSeq = new Sequence(inFASTA);  params->util.gobble(inFASTA);
			report.setCandidate(candidateSeq);

			int origNumBases = candidateSeq->getNumBases();
			string originalUnaligned = candidateSeq->getUnaligned();
			int numBasesNeeded = origNumBases * params->threshold;
	
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) {
                    if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); }
					alignment->resize(candidateSeq->getUnaligned().length()+2);
				}
                
                float searchScore;
				Sequence temp = params->templateDB->findClosestSequence(candidateSeq, searchScore);
				Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
								
				Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
		
				Sequence* copy;
				
				Nast* nast2;
				bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
												//since nast does not make a copy of hte sequence passed, and it is used by the reporter below
												//you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
												//so this bool tells you if you need to delete it
												
				//if there is a possibility that this sequence should be reversed
				if (candidateSeq->getNumBases() < numBasesNeeded) {
					numFlipped_1++;
					string wasBetter =  "";
					//if the user wants you to try the reverse
					if (params->flip) {
				
						//get reverse compliment
						copy = new Sequence(candidateSeq->getName(), originalUnaligned);
						copy->reverseComplement();
                        
                        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: flipping "  + candidateSeq->getName() + " \n"); }
						
						//rerun alignment
						Sequence temp2 = params->templateDB->findClosestSequence(copy, searchScore);
						Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
                        
                        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: closest template "  + temp2.getName() + " \n"); }
						
						nast2 = new Nast(alignment, copy, templateSeq2);
                        
                        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: completed Nast2 "  + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); }
			
						//check if any better
						if (copy->getNumBases() > candidateSeq->getNumBases()) {
							candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
                            delete templateSeq;
							templateSeq = templateSeq2;
							delete nast;
							nast = nast2;
							needToDeleteCopy = true;
							wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
                            numFlipped_0++;
						}else{  
							wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
							delete nast2;
                            delete templateSeq2;
							delete copy;	
						}
                        if (params->m->getDebug()) { params->m->mothurOut("[DEBUG]: done.\n"); }
					}
					
					//create accnos file with names
					params->accnosWriter->write(candidateSeq->getName() + wasBetter + "\n");
				}
				
				report.setTemplate(templateSeq);
				report.setSearchParameters(params->search, searchScore);
				report.setAlignmentParameters(params->alignMethod, alignment);
				report.setNastParameters(*nast);
	
				params->alignWriter->write('>' + candidateSeq->getName() + '\n' + candidateSeq->getAligned() + '\n');
				params->reportWriter->write(report.getReport());
				delete nast;
                delete templateSeq;
				if (needToDeleteCopy) {   delete copy;   }
                
				count++;
			}
			delete candidateSeq;
			
			#if defined NON_WINDOWS
				unsigned long long pos = inFASTA.tellg();
				if ((pos == -1) || (pos >= params->filePos.end)) { break; }
			#else
				if (count == params->filePos.end) { break; }
			#endif
			
			//report progress
			if((count) % 1000 == 0){	params->m->mothurOutJustToScreen(toString(count) + "\n"); 		}
			
		}
		//report progress
		if((count) % 1000 != 0){	params->m->mothurOutJustToScreen(toString(count) + "\n"); 		}
        
        params->numSeqs += count;
        params->flippedResults[0] += numFlipped_0;
        params->flippedResults[1] += numFlipped_1;
        
		delete alignment;
		inFASTA.close();
		
	}
	catch(exception& e) {
		params->m->errorOut(e, "AlignCommand", "driver");
		exit(1);
	}
}
Exemple #19
0
//**********************************************************************************************************************
int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){
	try {
		ofstream alignmentFile;
		m->openOutputFile(alignFName, alignmentFile);
		
		ofstream accnosFile;
		m->openOutputFile(accnosFName, accnosFile);
		
		NastReport report(reportFName);
		
		ifstream inFASTA;
		m->openInputFile(filename, inFASTA);

		inFASTA.seekg(filePos->start);

		bool done = false;
		int count = 0;
		
		//moved this into driver to avoid deep copies in windows paralellized version
		Alignment* alignment;
		int longestBase = templateDB->getLongestBase();
        if (m->debug) { m->mothurOut("[DEBUG]: template longest base = "  + toString(templateDB->getLongestBase()) + " \n"); }
		if(align == "gotoh")			{	alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);			}
		else if(align == "needleman")	{	alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);				}
		else if(align == "blast")		{	alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);		}
		else if(align == "noalign")		{	alignment = new NoAlign();													}
		else {
			m->mothurOut(align + " is not a valid alignment option. I will run the command using needleman.");
			m->mothurOutEndLine();
			alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);
		}
	
		while (!done) {
			
			if (m->control_pressed) {  break; }
			
			Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
			report.setCandidate(candidateSeq);

			int origNumBases = candidateSeq->getNumBases();
			string originalUnaligned = candidateSeq->getUnaligned();
			int numBasesNeeded = origNumBases * threshold;
	
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) {
                    if (m->debug) { m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); }
					alignment->resize(candidateSeq->getUnaligned().length()+2);
				}
				Sequence temp = templateDB->findClosestSequence(candidateSeq);
				Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
				
				float searchScore = templateDB->getSearchScore();
								
				Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
		
				Sequence* copy;
				
				Nast* nast2;
				bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
												//since nast does not make a copy of hte sequence passed, and it is used by the reporter below
												//you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
												//so this bool tells you if you need to delete it
												
				//if there is a possibility that this sequence should be reversed
				if (candidateSeq->getNumBases() < numBasesNeeded) {
					
					string wasBetter =  "";
					//if the user wants you to try the reverse
					if (flip) {
				
						//get reverse compliment
						copy = new Sequence(candidateSeq->getName(), originalUnaligned);
						copy->reverseComplement();
                        
                        if (m->debug) { m->mothurOut("[DEBUG]: flipping "  + candidateSeq->getName() + " \n"); }
						
						//rerun alignment
						Sequence temp2 = templateDB->findClosestSequence(copy);
						Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
                        
                        if (m->debug) { m->mothurOut("[DEBUG]: closest template "  + temp2.getName() + " \n"); }
						
						searchScore = templateDB->getSearchScore();
						
						nast2 = new Nast(alignment, copy, templateSeq2);
                        
                        if (m->debug) { m->mothurOut("[DEBUG]: completed Nast2 "  + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); }
			
						//check if any better
						if (copy->getNumBases() > candidateSeq->getNumBases()) {
							candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
                            delete templateSeq;
							templateSeq = templateSeq2;
							delete nast;
							nast = nast2;
							needToDeleteCopy = true;
							wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
						}else{  
							wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
							delete nast2;
                            delete templateSeq2;
							delete copy;	
						}
                        if (m->debug) { m->mothurOut("[DEBUG]: done.\n"); }
					}
					
					//create accnos file with names
					accnosFile << candidateSeq->getName() << wasBetter << endl;
				}
				
				report.setTemplate(templateSeq);
				report.setSearchParameters(search, searchScore);
				report.setAlignmentParameters(align, alignment);
				report.setNastParameters(*nast);
	
				alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
				
				report.print();
				delete nast;
                delete templateSeq;
				if (needToDeleteCopy) {   delete copy;   }
				
				count++;
			}
			delete candidateSeq;
			
			#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
				unsigned long long pos = inFASTA.tellg();
				if ((pos == -1) || (pos >= filePos->end)) { break; }
			#else
				if (inFASTA.eof()) { break; }
			#endif
			
			//report progress
			if((count) % 100 == 0){	m->mothurOutJustToScreen(toString(count) + "\n"); 		}
			
		}
		//report progress
		if((count) % 100 != 0){	m->mothurOutJustToScreen(toString(count) + "\n"); 		}
		
		delete alignment;
		alignmentFile.close();
		inFASTA.close();
		accnosFile.close();
		
		return count;
	}
	catch(exception& e) {
		m->errorOut(e, "AlignCommand", "driver");
		exit(1);
	}
}
Exemple #20
0
BasicSequence::BasicSequence(const Sequence& s) :
	BasicSymbolList(s),
	name_(s.getName()),
	comments_(s.getComments())
{}
Exemple #21
0
//***************************************************************************************************************
vector<Sequence*> MothurChimera::readSeqs(string file) {
	try {
		
		vector<Sequence*> container;
		int count = 0;
		length = 0;
		unaligned = false;
		ReferenceDB* rdb = ReferenceDB::getInstance();
		
		if (file == "saved") {
			
			
			m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");	m->mothurOutEndLine();
			
			for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
				Sequence* temp = new Sequence(rdb->referenceSeqs[i].getName(), rdb->referenceSeqs[i].getAligned());
				
				if (count == 0) {  length = temp->getAligned().length();  count++;  } //gets first seqs length
				else if (length != temp->getAligned().length()) {	unaligned = true;	}
				
				if (temp->getName() != "") {  container.push_back(temp);  }
			}
			
			templateFileName = rdb->getSavedReference();
			
		}else {
			
			m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
			
			
			ifstream in;
			m->openInputFile(file, in);
			
			//read in seqs and store in vector
			while(!in.eof()){
				
				if (m->control_pressed) { return container; }
				
				Sequence* current = new Sequence(in);  m->gobble(in);
				
				if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
				else if (length != current->getAligned().length()) {   unaligned = true;	}
							
				if (current->getName() != "") {  
					container.push_back(current);  
					if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
				}
			}
			in.close();
		
			m->mothurOut("Done."); m->mothurOutEndLine();
			
			filterString = (string(container[0]->getAligned().length(), '1'));
		}
		
		return container;
	}
	catch(exception& e) {
		m->errorOut(e, "MothurChimera", "readSeqs");
		exit(1);
	}
}
//**********************************************************************************************************************
string ChopSeqsCommand::getChopped(Sequence seq, string& qualValues) {
	try {
		string temp = seq.getAligned();
		string tempUnaligned = seq.getUnaligned();
		
		if (countGaps) {
			//if needed trim sequence
			if (keep == "front") {//you want to keep the beginning
				int tempLength = temp.length();

				if (tempLength > numbases) { //you have enough bases to remove some
				
					int stopSpot = 0;
					int numBasesCounted = 0;
					
					for (int i = 0; i < temp.length(); i++) {
						//eliminate N's
                        if (!keepN) { if (toupper(temp[i]) == 'N') { temp[i] = '.'; } }
						
						numBasesCounted++; 
						
						if (numBasesCounted >= numbases) { stopSpot = i; break; }
					}
					
					if (stopSpot == 0) { temp = ""; }
					else {  temp = temp.substr(0, stopSpot+1);  }
							
				}else { 
					if (!Short) { temp = ""; } //sequence too short
				}
			}else { //you are keeping the back
				int tempLength = temp.length();
				if (tempLength > numbases) { //you have enough bases to remove some
					
					int stopSpot = 0;
					int numBasesCounted = 0;
					
					for (int i = (temp.length()-1); i >= 0; i--) {
						//eliminate N's
                        if (!keepN) { if (toupper(temp[i]) == 'N') { temp[i] = '.'; } }
						
						numBasesCounted++; 

						if (numBasesCounted >= numbases) { stopSpot = i; break; }
					}
				
					if (stopSpot == 0) { temp = ""; }
					else {  temp = temp.substr(stopSpot+1);  }
				}else { 
					if (!Short) { temp = ""; } //sequence too short
				}
			}

		}else{
				
			//if needed trim sequence
			if (keep == "front") {//you want to keep the beginning
				int tempLength = tempUnaligned.length();

				if (tempLength > numbases) { //you have enough bases to remove some
					
					int stopSpot = 0;
					int numBasesCounted = 0;
					
					for (int i = 0; i < temp.length(); i++) {
						//eliminate N's
                        if (!keepN) {
                            if (toupper(temp[i]) == 'N') {
                                temp[i] = '.';
                                tempLength--;
                                if (tempLength < numbases) { stopSpot = 0; break; }
                            }
                        }
						if(isalpha(temp[i])) { numBasesCounted++; }
						
						if (numBasesCounted >= numbases) { stopSpot = i; break; }
					}
					
					if (stopSpot == 0) { temp = ""; }
					else {  temp = temp.substr(0, stopSpot+1);  }
                    
					qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(stopSpot+1) + '\n';
                    
				}else { 
					if (!Short) { temp = ""; qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(0) + '\n'; } //sequence too short
                    else { qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(tempLength) + '\n'; }
				}				
			}else { //you are keeping the back
				int tempLength = tempUnaligned.length();
				if (tempLength > numbases) { //you have enough bases to remove some
					
					int stopSpot = 0;
					int numBasesCounted = 0;
					
					for (int i = (temp.length()-1); i >= 0; i--) {
                        if (!keepN) {
                            //eliminate N's
                            if (toupper(temp[i]) == 'N') {
                                temp[i] = '.';
                                tempLength--;
                                if (tempLength < numbases) { stopSpot = 0; break; }
                            }
                        }
						if(isalpha(temp[i])) { numBasesCounted++; }

						if (numBasesCounted >= numbases) { stopSpot = i; break; }
					}
				
					if (stopSpot == 0) { temp = ""; }
					else {  temp = temp.substr(stopSpot);  }
                    
                    qualValues = seq.getName() +'\t' + toString(stopSpot) + '\t' + toString(temp.length()-1) + '\n';
                    
				}else { 
					if (!Short) { temp = ""; qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(0) + '\n'; } //sequence too short
                    else { qualValues = seq.getName() +'\t' + toString(0) + '\t' + toString(tempLength) + '\n'; }
				}
			}
		}
		
		return temp;
	}
	catch(exception& e) {
		m->errorOut(e, "ChopSeqsCommand", "getChopped");
		exit(1);
	}
}