size_t SequenceTools::findFirstOf(const Sequence& seq, const Sequence& motif, bool strict) { if (motif.size() > seq.size()) return seq.size(); for (size_t seqi = 0; seqi < seq.size() - motif.size() + 1; seqi++) { bool match = false; for (size_t moti = 0; moti < motif.size(); moti++) { if (strict) { match = seq.getValue(seqi + moti) == motif.getValue(moti); } else { match = AlphabetTools::match(seq.getAlphabet(), seq.getValue(seqi + moti), motif.getValue(moti)); } if (!match) { break; } } if (match) { return seqi; } } return seq.size(); }
double SequenceTools::getPercentIdentity(const Sequence& seq1, const Sequence& seq2, bool ignoreGaps) throw (AlphabetMismatchException, SequenceNotAlignedException) { if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("SequenceTools::getPercentIdentity", seq1.getAlphabet(), seq2.getAlphabet()); if (seq1.size() != seq2.size()) throw SequenceNotAlignedException("SequenceTools::getPercentIdentity", &seq2); int gap = seq1.getAlphabet()->getGapCharacterCode(); size_t id = 0; size_t tot = 0; for (size_t i = 0; i < seq1.size(); i++) { int x = seq1.getValue(i); int y = seq2.getValue(i); if (ignoreGaps) { if (x != gap && y != gap) { tot++; if (x == y) id++; } } else { tot++; if (x == y) id++; } } return static_cast<double>(id) / static_cast<double>(tot) * 100.; }
unsigned int SequenceFeatureTools::getOrfs(const Sequence& seq, SequenceFeatureSet& featSet, const GeneticCode& gCode) { if (! AlphabetTools::isNucleicAlphabet(seq.getAlphabet())) { throw AlphabetException("SequenceFeatureTools::getOrfs: Sequence alphabet must be nucleic!", seq.getAlphabet()); } unsigned int orfCpt = 0; const CodonAlphabet* codonAlpha = gCode.getSourceAlphabet(); std::vector< std::vector<size_t> > starts(3), stops(3); size_t phase = 0; for (size_t p = 0 ; p < seq.size() - 2 ; p++) { phase = p % 3; if (gCode.isStart(codonAlpha->getCodon(seq.getValue(p), seq.getValue(p + 1), seq.getValue(p + 2)))) { starts[phase].push_back(p); //std::cerr << "Start: " << p << " (" << phase << ")" << std::endl; } else if (gCode.isStop(codonAlpha->getCodon(seq.getValue(p), seq.getValue(p + 1), seq.getValue(p + 2)))) { stops[phase].push_back(p); //std::cerr << "Stop: " << p << " (" << phase << ")" << std::endl; } } for (size_t i = 0 ; i < 3 ; ++i) { std::vector< size_t >::iterator start(starts[i].begin()), stop(stops[i].begin()); while (stop != stops[i].end() && start != starts[i].end()) { if (*stop < *start) { stop++; } else { orfCpt++; //std::cerr << "ORF: " << *start << " - " << *stop + 2 << " (" << i << ")" << std::endl; bpp::BasicSequenceFeature feat("", seq.getName(), "Bio++", "CDS", *start, *stop + 2, '+'); featSet.addFeature(feat); start++; } } } return orfCpt; }
void VectorSiteContainer::setSequence(size_t pos, const Sequence& sequence, bool checkNames) throw (Exception) { if (pos >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::setSequence", pos, 0, getNumberOfSequences() - 1); // New sequence's alphabet and site container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), sequence.getAlphabet()); // If the container has only one sequence, we set the size to the size of this sequence: if (getNumberOfSequences() == 1) realloc(sequence.size()); if (sequence.size() != sites_.size()) throw SequenceException("VectorSiteContainer::setSequence. Sequence has not the appropriate length.", &sequence); if (checkNames) { for (size_t i = 0; i < names_.size(); i++) { if (i != pos && sequence.getName() == names_[i]) throw SequenceException("VectorSiteContainer::settSequence. Name already exists in container.", &sequence); } } // Update name: names_[pos] = sequence.getName(); // Update elements at each site: for (size_t i = 0; i < sites_.size(); i++) { sites_[i]->setElement(pos, sequence.getValue(i)); } // Update comments: if (comments_[pos]) delete comments_[pos]; comments_[pos] = new Comments(sequence.getComments()); // Update sequences: if (sequences_[pos]) delete sequences_[pos]; sequences_[pos] = 0; }
void VectorSiteContainer::addSequence(const Sequence& sequence, bool checkNames) throw (Exception) { // If the container has no sequence, we set the size to the size of this sequence: if (getNumberOfSequences() == 0) realloc(sequence.size()); // New sequence's alphabet and site container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) throw AlphabetMismatchException("VectorSiteContainer::addSequence", getAlphabet(), sequence.getAlphabet()); if (sequence.size() != sites_.size()) throw SequenceException("VectorSiteContainer::addSequence. Sequence has not the appropriate length: " + TextTools::toString(sequence.size()) + ", should be " + TextTools::toString(sites_.size()) + ".", &sequence); if (checkNames) { for (size_t i = 0; i < names_.size(); i++) { if (sequence.getName() == names_[i]) throw SequenceException("VectorSiteContainer::addSequence. Name already exists in container.", &sequence); } } // Append name: names_.push_back(sequence.getName()); // Append elements at each site: for (size_t i = 0; i < sites_.size(); i++) { sites_[i]->addElement(sequence.getValue(i)); } // Append comments: comments_.push_back(new Comments(sequence.getComments())); // Sequences pointers: sequences_.push_back(0); }
void VectorSiteContainer::addSequence( const Sequence& sequence, size_t pos, bool checkNames) throw (Exception) { if (pos >= getNumberOfSequences()) throw IndexOutOfBoundsException("VectorSiteContainer::addSequence.", pos, 0, getNumberOfSequences() - 1); if (sequence.size() != sites_.size()) throw SequenceNotAlignedException("VectorSiteContainer::setSequence", &sequence); // New sequence's alphabet and site container's alphabet matching verification if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType()) { throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), sequence.getAlphabet()); } if (checkNames) { for (size_t i = 0; i < names_.size(); i++) { if (sequence.getName() == names_[i]) throw SequenceException("VectorSiteContainer::addSequence. Name already exists in container.", &sequence); } } for (size_t i = 0; i < sites_.size(); i++) { // For each site: sites_[i]->addElement(pos, sequence.getValue(i)); } // Actualize names and comments: names_.insert(names_.begin() + pos, sequence.getName()); comments_.insert(comments_.begin() + pos, new Comments(sequence.getComments())); sequences_.insert(sequences_.begin() + pos, 0); }