size_t SequenceTools::findFirstOf(const Sequence& seq, const Sequence& motif, bool strict)
{
  if (motif.size() > seq.size())
    return seq.size();
  for (size_t seqi = 0; seqi < seq.size() - motif.size() + 1; seqi++)
  {
    bool match = false;
    for (size_t moti = 0; moti < motif.size(); moti++)
    {
      if (strict)
      {
        match = seq.getValue(seqi + moti) == motif.getValue(moti);
      }
      else
      {
        match = AlphabetTools::match(seq.getAlphabet(), seq.getValue(seqi + moti), motif.getValue(moti));
      }
      if (!match)
      {
        break;
      }
    }
    if (match)
    {
      return seqi;
    }
  }
  return seq.size();
}
double SequenceTools::getPercentIdentity(const Sequence& seq1, const Sequence& seq2, bool ignoreGaps) throw (AlphabetMismatchException, SequenceNotAlignedException)
{
  if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType())
    throw AlphabetMismatchException("SequenceTools::getPercentIdentity", seq1.getAlphabet(), seq2.getAlphabet());
  if (seq1.size() != seq2.size())
    throw SequenceNotAlignedException("SequenceTools::getPercentIdentity", &seq2);
  int gap = seq1.getAlphabet()->getGapCharacterCode();
  size_t id = 0;
  size_t tot = 0;
  for (size_t i = 0; i < seq1.size(); i++)
  {
    int x = seq1.getValue(i);
    int y = seq2.getValue(i);
    if (ignoreGaps)
    {
      if (x != gap && y != gap)
      {
        tot++;
        if (x == y)
          id++;
      }
    }
    else
    {
      tot++;
      if (x == y)
        id++;
    }
  }
  return static_cast<double>(id) / static_cast<double>(tot) * 100.;
}
unsigned int SequenceFeatureTools::getOrfs(const Sequence& seq, SequenceFeatureSet& featSet, const GeneticCode& gCode)
{
  if (! AlphabetTools::isNucleicAlphabet(seq.getAlphabet())) {
    throw AlphabetException("SequenceFeatureTools::getOrfs: Sequence alphabet must be nucleic!", seq.getAlphabet());
  }
  unsigned int orfCpt = 0;
  const CodonAlphabet* codonAlpha = gCode.getSourceAlphabet();
  std::vector< std::vector<size_t> > starts(3), stops(3);
  size_t phase = 0;
  for (size_t p = 0 ; p < seq.size() - 2 ; p++) {
    phase = p % 3;
    if (gCode.isStart(codonAlpha->getCodon(seq.getValue(p), seq.getValue(p + 1), seq.getValue(p + 2)))) {
      starts[phase].push_back(p);
      //std::cerr << "Start: " << p << " (" << phase << ")" << std::endl;
    } else if (gCode.isStop(codonAlpha->getCodon(seq.getValue(p), seq.getValue(p + 1), seq.getValue(p + 2)))) {
      stops[phase].push_back(p);
      //std::cerr << "Stop:  " << p << " (" << phase << ")" << std::endl;
    }
  }
  for (size_t i = 0 ; i < 3 ; ++i) {
    std::vector< size_t >::iterator start(starts[i].begin()), stop(stops[i].begin());
    while (stop != stops[i].end() && start != starts[i].end()) {
      if (*stop < *start) {
        stop++;
      } else {
        orfCpt++;
        //std::cerr << "ORF:  " << *start << " - " << *stop + 2 << " (" << i << ")" << std::endl;
        bpp::BasicSequenceFeature feat("", seq.getName(), "Bio++", "CDS", *start, *stop + 2, '+');
        featSet.addFeature(feat);
        start++;
      }
    }
  }
  return orfCpt;
}
void VectorSiteContainer::setSequence(size_t pos, const Sequence& sequence, bool checkNames)
throw (Exception)
{
  if (pos >= getNumberOfSequences())
    throw IndexOutOfBoundsException("VectorSiteContainer::setSequence", pos, 0, getNumberOfSequences() - 1);

  // New sequence's alphabet and site container's alphabet matching verification
  if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
    throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), sequence.getAlphabet());

  // If the container has only one sequence, we set the size to the size of this sequence:
  if (getNumberOfSequences() == 1)
    realloc(sequence.size());

  if (sequence.size() != sites_.size())
    throw SequenceException("VectorSiteContainer::setSequence. Sequence has not the appropriate length.", &sequence);

  if (checkNames)
  {
    for (size_t i = 0; i < names_.size(); i++)
    {
      if (i != pos && sequence.getName() == names_[i])
        throw SequenceException("VectorSiteContainer::settSequence. Name already exists in container.", &sequence);
    }
  }
  // Update name:
  names_[pos] = sequence.getName();
  // Update elements at each site:
  for (size_t i = 0; i < sites_.size(); i++)
  {
    sites_[i]->setElement(pos, sequence.getValue(i));
  }
  // Update comments:
  if (comments_[pos])
    delete comments_[pos];
  comments_[pos] = new Comments(sequence.getComments());
  // Update sequences:
  if (sequences_[pos])
    delete sequences_[pos];
  sequences_[pos] = 0;
}
void VectorSiteContainer::addSequence(const Sequence& sequence, bool checkNames) throw (Exception)
{
  // If the container has no sequence, we set the size to the size of this sequence:
  if (getNumberOfSequences() == 0)
    realloc(sequence.size());

  // New sequence's alphabet and site container's alphabet matching verification
  if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
    throw AlphabetMismatchException("VectorSiteContainer::addSequence", getAlphabet(), sequence.getAlphabet());

  if (sequence.size() != sites_.size())
    throw SequenceException("VectorSiteContainer::addSequence. Sequence has not the appropriate length: " + TextTools::toString(sequence.size()) + ", should be " + TextTools::toString(sites_.size()) + ".", &sequence);

  if (checkNames)
  {
    for (size_t i = 0; i < names_.size(); i++)
    {
      if (sequence.getName() == names_[i])
        throw SequenceException("VectorSiteContainer::addSequence. Name already exists in container.", &sequence);
    }
  }

  // Append name:
  names_.push_back(sequence.getName());

  // Append elements at each site:
  for (size_t i = 0; i < sites_.size(); i++)
  {
    sites_[i]->addElement(sequence.getValue(i));
  }

  // Append comments:
  comments_.push_back(new Comments(sequence.getComments()));

  // Sequences pointers:
  sequences_.push_back(0);
}
void VectorSiteContainer::addSequence(
  const Sequence& sequence,
  size_t pos,
  bool checkNames)
throw (Exception)
{
  if (pos >= getNumberOfSequences())
    throw IndexOutOfBoundsException("VectorSiteContainer::addSequence.", pos, 0, getNumberOfSequences() - 1);
  if (sequence.size() != sites_.size())
    throw SequenceNotAlignedException("VectorSiteContainer::setSequence", &sequence);

  // New sequence's alphabet and site container's alphabet matching verification
  if (sequence.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
  {
    throw AlphabetMismatchException("VectorSiteContainer::addSite", getAlphabet(), sequence.getAlphabet());
  }

  if (checkNames)
  {
    for (size_t i = 0; i < names_.size(); i++)
    {
      if (sequence.getName() == names_[i])
        throw SequenceException("VectorSiteContainer::addSequence. Name already exists in container.", &sequence);
    }
  }

  for (size_t i = 0; i < sites_.size(); i++)
  {
    // For each site:
    sites_[i]->addElement(pos, sequence.getValue(i));
  }
  // Actualize names and comments:
  names_.insert(names_.begin() + pos, sequence.getName());
  comments_.insert(comments_.begin() + pos, new Comments(sequence.getComments()));
  sequences_.insert(sequences_.begin() + pos, 0);
}