Example #1
0
  void EnzymaticDigestion::digest(const AASequence& protein, vector<AASequence>& output) const
  {
    // initialization
    output.clear();
    // naive cleavage sites
    Size missed_cleavages = missed_cleavages_;
    std::vector<Size> pep_positions = tokenize_(protein.toUnmodifiedString());
    Size count = pep_positions.size();
    Size begin = pep_positions[0];
    for (Size i = 1; i < count; ++i)
    {
      output.push_back(protein.getSubsequence(begin, pep_positions[i] - begin));
      begin = pep_positions[i];
    }
    output.push_back(protein.getSubsequence(begin, protein.size() - begin));

    // missed cleavages
    if (pep_positions.size() > 0 && missed_cleavages_ != 0) // there is at least one cleavage site!
    {
      // generate fragments with missed cleavages
      for (Size i = 1; ((i <= missed_cleavages) && (count > i)); ++i)
      {
        begin = pep_positions[0];
        for (Size j = 1; j < count - i; ++j)
        {
          output.push_back(protein.getSubsequence(begin, pep_positions[j + i] - begin));
          begin = pep_positions[j];
        }
        output.push_back(protein.getSubsequence(begin, protein.size() - begin));
      }
    }
  }
Example #2
0
 Size EnzymaticDigestion::peptideCount(const AASequence& protein)
 {
   std::vector<Size> pep_positions = tokenize_(protein.toUnmodifiedString());
   Size count = pep_positions.size();
   // missed cleavages
   Size sum = count;
   for (Size i = 1; i < count; ++i)
   {
     if (i > missed_cleavages_) break;
     sum += count - i;
   }
   return sum;
 }
Example #3
0
  Size ProteaseDigestion::digest(const AASequence& protein, vector<AASequence>& output, Size min_length, Size max_length) const
  {
    // initialization
    output.clear();

    // disable max length filter by setting to maximum length
    if (max_length == 0 || max_length > protein.size())
    {
      max_length = protein.size();
    }

    Size mc = (enzyme_->getName() == UnspecificCleavage) ? std::numeric_limits<Size>::max() : missed_cleavages_;
    Size wrong_size(0);

    // naive cleavage sites
    std::vector<int> pep_positions = tokenize_(protein.toUnmodifiedString());
    pep_positions.push_back(protein.size()); // positions now contains 0, x1, ... xn, end
    Size count = pep_positions.size();
    Size begin = pep_positions[0];
    for (Size i = 1; i < count; ++i)
    {
      Size l = pep_positions[i] - begin;
      if (l >= min_length && l <= max_length) output.push_back(protein.getSubsequence(begin, l));
      else ++wrong_size;
      begin = pep_positions[i];
    }

    // missed cleavages
    if (pep_positions.size() > 1 && mc != 0) // there is at least one cleavage site (in addition to last position)!
    {
      // generate fragments with missed cleavages
      for (Size mcs = 1; ((mcs <= mc) && (mcs < count - 1)); ++mcs)
      {
        begin = pep_positions[0];
        for (Size j = 1; j < count - mcs; ++j)
        {
          Size l = pep_positions[j + mcs] - begin;
          if (l >= min_length && l <= max_length) output.push_back(protein.getSubsequence(begin, l));
          else ++wrong_size;
          begin = pep_positions[j];
        }
      }
    }
    return wrong_size;
  }
Example #4
0
  Size ProteaseDigestion::peptideCount(const AASequence& protein)
  {
    // For unspecific cleavage every cutting position may be skipped. Thus, we get (n + 1) \choose 2 products.
    if (enzyme_->getName() == UnspecificCleavage) 
    {
      return (protein.size() + 1) * protein.size() / 2;
    };

    std::vector<int> pep_positions = tokenize_(protein.toUnmodifiedString());
    Size count = pep_positions.size();
    // missed cleavages
    Size sum = count;
    for (Size i = 1; i < count; ++i)
    {
      if (i > missed_cleavages_) break;
      sum += count - i;
    }
    return sum;
  }
Example #5
0
  void EnzymaticDigestion::digestUnmodifiedString(const StringView sequence, std::vector<StringView>& output, Size min_length, Size max_length) const
  {
    // initialization
    output.clear();

    // naive cleavage sites
    std::vector<Size> pep_positions = tokenize_(sequence.getString());
    Size count = pep_positions.size();

    // disable max length filter by setting to maximum length
    if (max_length == 0)
    {
      max_length = sequence.size();
    }

    // no cleavage sites? return full string
    if (count == 0) 
    {
      if (sequence.size() >= min_length && sequence.size() <= max_length)
      {
        output.push_back(sequence);
      }
      return;
    }

    for (Size i = 1; i != count; ++i)
    {
      // add if cleavage product larger then min length
      Size l = pep_positions[i] - pep_positions[i - 1];
      if (l >= min_length && l <= max_length)
      {
        output.push_back(sequence.substr(pep_positions[i - 1], pep_positions[i] - 1));
      }
    }

    // add last cleavage product (need to add because end is not a cleavage site) if larger then min length
    Size l = sequence.size() - pep_positions[count - 1];
    if (l >= min_length && l <= max_length)
    {
      output.push_back(sequence.substr(pep_positions[count - 1], sequence.size() - 1));
    }

    // generate fragments with missed cleavages
    for (Size i = 1; ((i <= missed_cleavages_) && (i < count)); ++i)
    {
      for (Size j = 1; j < count - i; ++j)
      {
        Size l = pep_positions[j + i] - pep_positions[j - 1];
        if (l >= min_length && l <= max_length)
        {
          output.push_back(sequence.substr(pep_positions[j - 1], pep_positions[j + i] - 1));
        }
      }

      // add last cleavage product (need to add because end is not a cleavage site)
      Size l = sequence.size() - pep_positions[count - i - 1];
      if (l >= min_length && l <= max_length)
      {
        output.push_back(sequence.substr(pep_positions[count - i - 1], sequence.size() - 1 ));
      }
    }
  }
Example #6
0
  bool EnzymaticDigestion::isValidProduct(const AASequence& protein,
                                          Size pep_pos, Size pep_length,
                                          bool methionine_cleavage,
                                          bool ignore_missed_cleavages) const
  {
    if (pep_pos >= protein.size())
    {
      LOG_WARN << "Error: start of peptide (" << pep_pos << ") is beyond end of protein '" << protein.toString() << "'!" << endl;
      return false;
    }
    else if (pep_pos + pep_length > protein.size())
    {
      LOG_WARN << "Error: end of peptide (" << (pep_pos + pep_length) << ") is beyond end of protein '" << protein.toString() << "'!" << endl;
      return false;
    }
    else if (pep_length == 0 || protein.size() == 0)
    {
      LOG_WARN << "Error: peptide or protein must not be empty!" << endl;
      return false;
    }

    if (specificity_ == SPEC_NONE)
    {
      return true; // we don't care about terminal ends
    }
    else // either SPEC_SEMI or SPEC_FULL
    {
      bool spec_c = false, spec_n = false;

      std::vector<Size> pep_positions = tokenize_(protein.toUnmodifiedString());
      // initialize start and end
      std::vector<Size>::const_iterator begin_pos, end_pos;
      begin_pos = end_pos = pep_positions.end();
      // test each end
      if (pep_pos == 0 ||
          (begin_pos = std::find(pep_positions.begin(), pep_positions.end(), pep_pos)) != pep_positions.end())
      {
        spec_n = true;
      }
      // if allow methionine cleavage at the protein start position
      if (pep_pos == 1 && methionine_cleavage && protein.getResidue((Size)0).getOneLetterCode() == "M")
      {
        // methionine_cleavage:consider the first product for begin_pos
        begin_pos = pep_positions.begin();
        spec_n = true;
      }
      if (pep_pos + pep_length == protein.size() ||
          (end_pos = std::find(pep_positions.begin(), pep_positions.end(), pep_pos  + pep_length)) != pep_positions.end())
      {
        spec_c = true;
      }

      if (spec_n && spec_c)
      {
        if (ignore_missed_cleavages)
        {
          return true;
        }
        Size offset = std::distance(begin_pos, end_pos);
        if (pep_pos + pep_length == protein.size())
        {
          return (pep_positions.size() <= getMissedCleavages() + 1);
        }
        else if (offset > getMissedCleavages() + 1)
        {
          return false;
        }
        else if (offset == 0)
        {
          // This corner case needs to be checked when peptide is at the start and the end of the protein.
          // We check with the total number of cleavages.
          return (pep_positions.size() >= getMissedCleavages() + 1);
        }
        else
        {
          return true;
        }
      }
      else if ((specificity_ == SPEC_SEMI) && (spec_n || spec_c))
      {
        return true; // one only for SEMI
      }
      else
      {
        return false;
      }
    }
  }