Пример #1
0
  double Residue::getMonoWeight(ResidueType res_type) const
  {
    switch (res_type)
    {
    case Full:
      return mono_weight_;

    case Internal:
      return mono_weight_ - getInternalToFullMonoWeight();

    case NTerminal:
      return mono_weight_ - getNTerminalToFullMonoWeight();

    case CTerminal:
      return mono_weight_ - getCTerminalToFullMonoWeight();

    case BIon:
      return mono_weight_ - getBIonToFullMonoWeight();

    case AIon:
      return mono_weight_ - getAIonToFullMonoWeight();

    case CIonMinusOne:
      return mono_weight_ - getCIonMinusOneToFullMonoWeight();

    case CIon:
      return mono_weight_ - EmpiricalFormula("OH").getMonoWeight() + EmpiricalFormula("NH").getMonoWeight();

    case CIonPlusOne:
      return mono_weight_ - getCIonPlusOneToFullMonoWeight();

    case CIonPlusTwo:
      return mono_weight_ - getCIonPlusTwoToFullMonoWeight();

    case XIon:
      return mono_weight_ + getXIonToFullMonoWeight();

    case YIon:
      return mono_weight_ + getYIonToFullMonoWeight();

    case ZIonMinusOne:
      return mono_weight_ - getZIonMinusOneToFullMonoWeight();

    case ZIon:
      return mono_weight_ - getZIonToFullMonoWeight();

    case ZIonPlusOne:
      return mono_weight_ - getZIonPlusOneToFullMonoWeight();

    case ZIonPlusTwo:
      return mono_weight_ - getZIonPlusTwoToFullMonoWeight();

    default:
      cerr << "Residue::getMonoWeight: unknown ResidueType" << endl;
      return mono_weight_;
    }
  }
Пример #2
0
  EmpiricalFormula Residue::getFormula(ResidueType res_type) const
  {
    switch (res_type)
    {
    case Full:
      return formula_;

    case Internal:
      return internal_formula_;

    case NTerminal:
      return formula_ - getNTerminalToFull();

    case CTerminal:
      return formula_ - getCTerminalToFull();

    case BIon:
      return formula_ - getBIonToFull();

    case AIon:
      return formula_ - getAIonToFull();

    case CIonMinusOne:
      return formula_ - getCIonMinusOneToFull();

    case CIon:
      return formula_ - EmpiricalFormula("OH") + EmpiricalFormula("NH");

    case XIon:
      return formula_ + getXIonToFull();

    case YIon:
      return formula_ + getYIonToFull();

    case ZIonMinusOne:
      return formula_ - getZIonMinusOneToFull();

    case ZIon:
      return formula_ - getZIonToFull();

    case ZIonPlusOne:
      return formula_ - getZIonPlusOneToFull();

    case ZIonPlusTwo:
      return formula_ - getZIonPlusTwoToFull();

    default:
      cerr << "Residue::getFormula: unknown ResidueType" << endl;
      return formula_;
    }
  }
Пример #3
0
  void Residue::setModification(const String & modification)
  {
    //modification_ = modification;

    ModificationsDB * mod_db = ModificationsDB::getInstance();
    ResidueModification mod = mod_db->getModification(one_letter_code_, modification, ResidueModification::ANYWHERE);

    modification_ = mod.getId();
    // update all the members
    if (mod.getAverageMass() != 0)
    {
      average_weight_ = mod.getAverageMass();
    }
    if (mod.getMonoMass() != 0)
    {
      mono_weight_ = mod.getMonoMass();
    }

    bool updated_formula(false);
    if (!mod.getDiffFormula().isEmpty())
    {
      updated_formula = true;
      setFormula(getFormula() + mod.getDiffFormula());
    }
    if (mod.getFormula() != "" && !updated_formula)
    {
      updated_formula = true;
      String formula = mod.getFormula();
      formula.removeWhitespaces();
      formula_ = EmpiricalFormula(formula);
    }

    if (updated_formula)
    {
      average_weight_ = formula_.getAverageWeight();
      mono_weight_ = formula_.getMonoWeight();
    }
    else
    {
      if (mod.getAverageMass() != 0)
      {
        average_weight_ = mod.getAverageMass();
      }
      if (mod.getMonoMass() != 0)
      {
        mono_weight_ = mod.getMonoMass();
      }
    }

    // neutral losses
    loss_formulas_.clear();
    loss_names_.clear();
    if (mod.hasNeutralLoss())
    {
      loss_formulas_.push_back(mod.getNeutralLossDiffFormula());
      loss_names_.push_back(mod.getNeutralLossDiffFormula().toString());
    }

    is_modified_ = true;
  }
  void CompNovoIdentificationBase::getCIDSpectrumLight_(PeakSpectrum & spec, const String & sequence, DoubleReal prefix, DoubleReal suffix)
  {
    static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight();
    Peak1D p;
    DoubleReal b_pos(0.0 + prefix);
    DoubleReal y_pos(h2o_mass + suffix);
    for (Size i = 0; i != sequence.size() - 1; ++i)
    {
      char aa(sequence[i]);
      b_pos += aa_to_weight_[aa];

      char aa2(sequence[sequence.size() - i - 1]);
      y_pos += aa_to_weight_[aa2];

      if (b_pos > min_mz_ && b_pos < max_mz_)
      {
        p.setPosition(b_pos + Constants::PROTON_MASS_U);
        p.setIntensity(1.0f);
        spec.push_back(p);
      }

      if (y_pos > min_mz_ && y_pos < max_mz_)
      {
        p.setPosition(y_pos + Constants::PROTON_MASS_U);
        p.setIntensity(1.0f);
        spec.push_back(p);
      }
    }

    spec.sortByPosition();
    return;
  }
Пример #5
0
  EmpiricalFormula IsotopeModel::getFormula()
  {
    CoordinateType mass = mean_ * charge_;

    Int C_num = Int(0.5 + mass * averagine_[C]);
    Int N_num = Int(0.5 + mass * averagine_[N]);
    Int O_num = Int(0.5 + mass * averagine_[O]);
    Int H_num = Int(0.5 + mass * averagine_[H]);
    Int S_num = Int(0.5 + mass * averagine_[S]);

    String form;
    if (C_num)
      form.append("C").append(String(C_num));
    if (H_num)
      form.append("H").append(String(H_num));
    if (N_num)
      form.append("N").append(String(N_num));
    if (O_num)
      form.append("O").append(String(O_num));
    if (S_num)
      form.append("S").append(String(S_num));

    return EmpiricalFormula(form);
  }
Пример #6
0
  Residue* ResidueDB::parseResidue_(Map<String, String>& values)
  {
    vector<EmpiricalFormula> low_mass_ions;
    Residue* res_ptr = new Residue();

    for (Map<String, String>::iterator it = values.begin(); it != values.end(); ++it)
    {
      String key(it->first);
      String value(it->second);

      if (key.hasSuffix(":Name"))
      {
        res_ptr->setName(value);
        continue;
      }
      if (key.hasSuffix(":ShortName"))
      {
        res_ptr->setShortName(value);
        continue;
      }
      if (key.hasSuffix(":ThreeLetterCode"))
      {
        res_ptr->setThreeLetterCode(value);
        continue;
      }
      if (key.hasSuffix(":OneLetterCode"))
      {
        res_ptr->setOneLetterCode(value);
        continue;
      }
      if (key.hasSuffix(":Formula"))
      {
        EmpiricalFormula formula(value);
        res_ptr->setFormula(EmpiricalFormula(value));
        res_ptr->setAverageWeight(formula.getAverageWeight());
        res_ptr->setMonoWeight(formula.getMonoWeight());
        continue;
      }

      if (key.hasSubstring(":Losses:LossName"))
      {
        res_ptr->addLossName(value);
        continue;
      }
      if (key.hasSubstring(":Losses:LossFormula"))
      {
        EmpiricalFormula loss(value);
        res_ptr->addLossFormula(loss);
        continue;
      }

      if (key.hasSubstring("NTermLosses:LossName"))
      {
        res_ptr->addNTermLossName(value);
        continue;
      }

      if (key.hasSubstring("NTermLosses:LossFormula"))
      {
        EmpiricalFormula loss(value);
        res_ptr->addNTermLossFormula(loss);
        continue;
      }

      if (key.hasSubstring("LowMassIons"))
      {
        // no markers defined?
        if (!key.hasSuffix(":"))
        {
          low_mass_ions.push_back(EmpiricalFormula(value));
        }
        continue;
      }
      if (key.hasSubstring("Synonyms"))
      {
        // no synonyms defined?
        if (!key.hasSuffix(":"))
        {
          res_ptr->addSynonym(value);
        }
        continue;
      }
      if (key.hasSubstring("pka"))
      {
        // no pka defined?
        if (!key.hasSuffix(":"))
        {
          res_ptr->setPka(value.toDouble());
        }
        continue;
      }
      if (key.hasSubstring("pkb"))
      {
        // no pkb defined?
        if (!key.hasSuffix(":"))
        {
          res_ptr->setPkb(value.toDouble());
        }
        continue;
      }
      if (key.hasSubstring("pkc"))
      {
        // no pkc defined?
        if (!key.hasSuffix(":"))
        {
          res_ptr->setPkc(value.toDouble());
        }
        continue;
      }
      if (key.hasSubstring("GB_SC"))
      {
        res_ptr->setSideChainBasicity(value.toDouble());
        continue;
      }
      if (key.hasSubstring("GB_BB_L"))
      {
        res_ptr->setBackboneBasicityLeft(value.toDouble());
        continue;
      }
      if (key.hasSubstring("GB_BB_R"))
      {
        res_ptr->setBackboneBasicityRight(value.toDouble());
        continue;
      }
      if (key.hasSubstring("ResidueSets"))
      {
        StringList residue_sets = ListUtils::create<String>(value);
        for (StringList::const_iterator local_it = residue_sets.begin(); local_it != residue_sets.end(); ++local_it)
        {
          res_ptr->addResidueSet(*local_it);
          residue_sets_.insert(*local_it);
        }
        continue;
      }
      cerr << "unknown key: " << key << ", with value: " << value << endl;
    }

    if (!low_mass_ions.empty())
    {
      res_ptr->setLowMassIons(low_mass_ions);
    }

    for (set<String>::const_iterator it = res_ptr->getResidueSets().begin(); it != res_ptr->getResidueSets().end(); ++it)
    {
      residues_by_set_[*it].insert(res_ptr);
    }

    return res_ptr;
  }
  void TheoreticalSpectrumGenerator::addPrecursorPeaks(RichPeakSpectrum & spec, const AASequence & peptide, Int charge) const
  {
    RichPeak1D p;

    // precursor peak
    double mono_pos = peptide.getMonoWeight(Residue::Full, charge) / double(charge);
    if (add_isotopes_)
    {
      IsotopeDistribution dist = peptide.getFormula(Residue::Full, charge).getIsotopeDistribution(max_isotope_);
      UInt j(0);
      for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j)
      {
        p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge);
        p.setIntensity(pre_int_ *  it->second);
        if (add_metainfo_)
        {
          String name("[M+H]+");
          if (charge == 2)
          {
            name = "[M+2H]++";
          }
          p.setMetaValue("IonName", name);
        }
        spec.push_back(p);
      }
    }
    else
    {
      p.setMZ(mono_pos);
      p.setIntensity(pre_int_);
      if (add_metainfo_)
      {
        String name("[M+H]+");
        if (charge == 2)
        {
          name = "[M+2H]++";
        }
        p.setMetaValue("IonName", name);
      }
      spec.push_back(p);
    }
    // loss peaks of the precursor

    //loss of water
    EmpiricalFormula ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("H2O");
    mono_pos = ion.getMonoWeight() / double(charge);
    if (add_isotopes_)
    {
      IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope_);
      UInt j(0);
      for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j)
      {
        p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge);
        p.setIntensity(pre_int_H2O_ *  it->second);
        if (add_metainfo_)
        {
          String name("[M+H]-H2O+");
          if (charge == 2)
          {
            name = "[M+2H]-H2O++";
          }
          p.setMetaValue("IonName", name);
        }
        spec.push_back(p);
      }
    }
    else
    {
      p.setMZ(mono_pos);
      p.setIntensity(pre_int_H2O_);
      if (add_metainfo_)
      {
        String name("[M+H]-H2O+");
        if (charge == 2)
        {
          name = "[M+2H]-H2O++";
        }
        p.setMetaValue("IonName", name);
      }
      spec.push_back(p);
    }

    //loss of ammonia
    ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("NH3");
    mono_pos = ion.getMonoWeight() / double(charge);
    if (add_isotopes_)
    {
      IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope_);
      UInt j(0);
      for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j)
      {
        p.setMZ((double)(mono_pos + j * Constants::NEUTRON_MASS_U) / (double)charge);
        p.setIntensity(pre_int_NH3_ *  it->second);
        if (add_metainfo_)
        {
          String name("[M+H]-NH3+");
          if (charge == 2)
          {
            name = "[M+2H]-NH3++";
          }
          p.setMetaValue("IonName", name);
        }
        spec.push_back(p);
      }
    }
    else
    {
      p.setMZ(mono_pos);
      p.setIntensity(pre_int_NH3_);
      if (add_metainfo_)
      {
        String name("[M+H]-NH3+");
        if (charge == 2)
        {
          name = "[M+2H]-NH3++";
        }
        p.setMetaValue("IonName", name);
      }
      spec.push_back(p);
    }

    spec.sortByPosition();
  }
  void TheoreticalSpectrumGenerator::addLosses_(RichPeakSpectrum & spectrum, const AASequence & ion, double intensity, Residue::ResidueType res_type, int charge) const 
  {
    RichPeak1D p;

    set<String> losses;
    for (AASequence::ConstIterator it = ion.begin(); it != ion.end(); ++it)
    {
      if (it->hasNeutralLoss())
      {
        vector<EmpiricalFormula> loss_formulas = it->getLossFormulas();
        for (Size i = 0; i != loss_formulas.size(); ++i)
        {
          losses.insert(loss_formulas[i].toString());
        }
      }
    }

    if (!add_isotopes_)
    {
      p.setIntensity(intensity * rel_loss_intensity_);
    }

    for (set<String>::const_iterator it = losses.begin(); it != losses.end(); ++it)
    {
      EmpiricalFormula loss_ion = ion.getFormula(res_type, charge) - EmpiricalFormula(*it);
      // thanks to Chris and Sandro
      // check for negative element frequencies (might happen if losses are not allowed for specific ions)
      bool negative_elements(false);
      for (EmpiricalFormula::ConstIterator eit = loss_ion.begin(); eit != loss_ion.end(); ++eit)
      {
        if (eit->second < 0)
        {
          negative_elements = true;
          break;
        }
      }
      if (negative_elements)
      {
        continue;
      }
      double loss_pos = loss_ion.getMonoWeight() / (double)charge;
      const String& loss_name = *it;

      if (add_isotopes_)
      {
        IsotopeDistribution dist = loss_ion.getIsotopeDistribution(max_isotope_);
        UInt j(0);
        for (IsotopeDistribution::ConstIterator iso = dist.begin(); iso != dist.end(); ++iso)
        {
          p.setMZ((double)(loss_pos + j) / (double)charge);
          p.setIntensity(intensity * rel_loss_intensity_ * iso->second);
          if (add_metainfo_ && j == 0)
          {
            // note: important to construct a string from char. If omitted it will perform pointer arithmetics on the "-" string literal
            String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + "-" + loss_name + String(charge, '+');
            p.setMetaValue("IonName", ion_name);
          }
          spectrum.push_back(p);
        }
      }
      else
      {
        p.setMZ(loss_pos);
        if (add_metainfo_)
        {
          // note: important to construct a string from char. If omitted it will perform pointer arithmetics on the "-" string literal
          String ion_name = String(residueTypeToIonLetter_(res_type)) + String(ion.size()) + "-" + loss_name + String(charge, '+');
          p.setMetaValue("IonName", ion_name);
        }
        spectrum.push_back(p);
      }
    }

  }
Пример #9
0
// divide and conquer algorithm of the sequencing
void CompNovoIdentificationCID::getDecompositionsDAC_(set<String> & sequences, Size left, Size right, double peptide_weight, const PeakSpectrum & CID_spec, Map<double, CompNovoIonScoringCID::IonScore> & ion_scores)
{
    static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight();
    double offset_suffix(CID_spec[left].getPosition()[0] - oxonium_mass);
    double offset_prefix(peptide_weight - CID_spec[right].getPosition()[0]);

#ifdef DAC_DEBUG
    static Int depth_(0);
    ++depth_;
    String tabs_(depth_, '\t');
    cerr << tabs_ << "void getDecompositionsDAC(sequences[" << sequences.size() << "], " << left << ", " << right << ") ";
    cerr << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " diff=";
#endif

    double diff = CID_spec[right].getPosition()[0] - CID_spec[left].getPosition()[0];

#ifdef DAC_DEBUG
    cerr << diff << endl;
    cerr << "offset_prefix=" << offset_prefix << ", offset_suffix=" << offset_suffix << endl;
#endif

    if (subspec_to_sequences_.has(left) && subspec_to_sequences_[left].has(right))
    {
        sequences = subspec_to_sequences_[left][right];

#ifdef DAC_DEBUG
        depth_--;
        cerr << tabs_ << "from cache DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << " " << left << " " << right << endl;
#endif
        return;
    }

    // no further solutions possible?
    if (diff < min_aa_weight_)
    {
#ifdef DAC_DEBUG
        depth_--;
#endif
        return;
    }

    // no further division needed?
    if (diff <= max_decomp_weight_)
    {
        vector<MassDecomposition> decomps;

        // if we are at the C-terminus use precursor_mass_tolerance_
        if (offset_prefix < precursor_mass_tolerance_)
        {
            Param decomp_param(mass_decomp_algorithm_.getParameters());
            decomp_param.setValue("tolerance", precursor_mass_tolerance_);
            mass_decomp_algorithm_.setParameters(decomp_param);
            getDecompositions_(decomps, diff);
            decomp_param.setValue("tolerance", fragment_mass_tolerance_);
            mass_decomp_algorithm_.setParameters(decomp_param);
        }
        else
        {
            getDecompositions_(decomps, diff);
        }
        //filterDecomps_(decomps);

#ifdef DAC_DEBUG
        cerr << tabs_ << "Found " << decomps.size() << " decomps" << endl;
        cerr << tabs_ << "Permuting...";
#endif

        //static Map<String, set<String> > permute_cache;
        for (vector<MassDecomposition>::const_iterator it = decomps.begin(); it != decomps.end(); ++it)
        {
#ifdef DAC_DEBUG
            cerr << it->toString() << endl;
#endif

            String exp_string = it->toExpandedString();
            if (!permute_cache_.has(exp_string))
            {
                permute_("", exp_string, sequences);
                permute_cache_[exp_string] = sequences;
            }
            else
            {
                sequences = permute_cache_[exp_string];
            }
        }

#ifdef DAC_DEBUG
        cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl;
        if (sequences.size() > max_subscore_number_)
        {
            cerr << tabs_ << "Reducing #sequences from " << sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix  << ", suffix=" << offset_suffix << ")...";
        }
#endif

        // C-terminus
        if (offset_suffix <= precursor_mass_tolerance_)
        {
            filterPermuts_(sequences);
        }

        // reduce the sequences
        reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix);
#ifdef DAC_DEBUG
        cerr << "Writing to cache " << left << " " << right << endl;
#endif
        subspec_to_sequences_[left][right] = sequences;

#ifdef DAC_DEBUG
        cerr << "ended" << endl;
        cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl;
        depth_--;
#endif

        return;
    }

    // select suitable pivot peaks
    vector<Size> pivots;

    if (offset_suffix < precursor_mass_tolerance_ && offset_prefix < precursor_mass_tolerance_)
    {
        selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, true);
    }
    else
    {
        selectPivotIons_(pivots, left, right, ion_scores, CID_spec, peptide_weight, false);
    }

    // run divide step
#ifdef DAC_DEBUG
    cerr << tabs_ << "Selected " << pivots.size() << " pivot ions: ";
    for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it)
    {
        cerr << *it << "(" << CID_spec[*it].getPosition()[0] << ") ";
    }
    cerr << endl;
#endif

    for (vector<Size>::const_iterator it = pivots.begin(); it != pivots.end(); ++it)
    {
        set<String> seq1, seq2, new_sequences;

        // the smaller the 'gap' the greater the chance of not finding anything
        // so we we compute the smaller gap first
        double diff1(CID_spec[*it].getPosition()[0] - CID_spec[left].getPosition()[0]);
        double diff2(CID_spec[right].getPosition()[0] - CID_spec[*it].getPosition()[0]);

        if (diff1 < diff2)
        {
            getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores);
            if (seq1.empty())
            {
#ifdef DAC_DEBUG
                cerr << tabs_ << "first call produced 0 candidates (" << diff1 << ")" << endl;
#endif
                continue;
            }

            getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores);
        }
        else
        {
            getDecompositionsDAC_(seq2, *it, right, peptide_weight, CID_spec, ion_scores);
            if (seq2.empty())
            {
#ifdef DAC_DEBUG
                cerr << tabs_ << "second call produced 0 candidates (" << diff2 << ")" << endl;
#endif
                continue;
            }

            getDecompositionsDAC_(seq1, left, *it, peptide_weight, CID_spec, ion_scores);
        }

#ifdef DAC_DEBUG
        cerr << tabs_ << "Found " << seq1.size() << " solutions (1) " << diff1 << endl;
        cerr << tabs_ << "Found " << seq2.size() << " solutions (2) " << diff2 << endl;
        cerr << tabs_ << "inserting " << seq1.size() * seq2.size()  << " sequences" << endl;
#endif

        // C-terminus
        if (offset_suffix <= fragment_mass_tolerance_)
        {
            filterPermuts_(seq1);
        }

        // test if we found enough sequence candidates
        if (seq1.empty() || seq2.empty())
        {
            continue;
        }

        for (set<String>::const_iterator it1 = seq1.begin(); it1 != seq1.end(); ++it1)
        {
            for (set<String>::const_iterator it2 = seq2.begin(); it2 != seq2.end(); ++it2)
            {
                new_sequences.insert(*it2 + *it1);
            }
        }

        if (seq1.size() * seq2.size() > max_subscore_number_ /* && (offset_prefix > fragment_mass_tolerance_ || offset_suffix > fragment_mass_tolerance_)*/)
        {
#ifdef DAC_DEBUG
            cerr << tabs_ << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << peptide_weight << endl;
            cerr << tabs_ << "Reducing #sequences from " << new_sequences.size() << " to " << max_subscore_number_ << "(prefix=" << offset_prefix  << ", suffix=" << offset_suffix << ")...";
#endif
            if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_)
            {
                reducePermuts_(new_sequences, CID_spec, offset_prefix, offset_suffix);
            }

#ifdef DAC_DEBUG
            for (set<String>::const_iterator it1 = new_sequences.begin(); it1 != new_sequences.end(); ++it1)
            {
                cerr << tabs_ << *it1 << endl;
            }
            cerr << endl;
#endif
        }

        for (set<String>::const_iterator sit = new_sequences.begin(); sit != new_sequences.end(); ++sit)
        {
            sequences.insert(*sit);
        }
    }
#ifdef DAC_DEBUG
    cerr << tabs_ << "Found sequences for " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << endl;
    for (set<String>::const_iterator sit = sequences.begin(); sit != sequences.end(); ++sit)
    {
        cerr << tabs_ << *sit << endl;
    }
#endif

    // reduce the permuts once again to reduce complexity
    if (offset_prefix > precursor_mass_tolerance_ || offset_suffix > precursor_mass_tolerance_)
    {
        reducePermuts_(sequences, CID_spec, offset_prefix, offset_suffix);
    }

#ifdef DAC_DEBUG
    cerr << "Writing to cache " << left << " " << right << endl;
#endif

    subspec_to_sequences_[left][right] = sequences;

#ifdef DAC_DEBUG
    depth_--;
    cerr << tabs_ << "DAC: " << CID_spec[left].getPosition()[0] << " " << CID_spec[right].getPosition()[0] << " " << sequences.size() << endl;
#endif
    return;

}
Пример #10
0
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec)
{
    //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0)
    //{
    //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl;
    //return;
    //}

    PeakSpectrum new_CID_spec(CID_spec);
    windowMower_(new_CID_spec, 0.3, 1);

    Param zhang_param;
    zhang_param = zhang_.getParameters();
    zhang_param.setValue("tolerance", fragment_mass_tolerance_);
    zhang_param.setValue("use_gaussian_factor", "true");
    zhang_param.setValue("use_linear_factor", "false");
    zhang_.setParameters(zhang_param);


    Normalizer normalizer;
    Param n_param(normalizer.getParameters());
    n_param.setValue("method", "to_one");
    normalizer.setParameters(n_param);
    normalizer.filterSpectrum(new_CID_spec);

    Size charge(2);
    double precursor_weight(0);     // [M+H]+
    if (!CID_spec.getPrecursors().empty())
    {
        // believe charge of spectrum?
        if (CID_spec.getPrecursors().begin()->getCharge() != 0)
        {
            charge = CID_spec.getPrecursors().begin()->getCharge();
        }
        else
        {
            // TODO estimate charge state
        }
        precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U);
    }

    //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl;

    // now delete all peaks that are right of the estimated precursor weight
    Size peak_counter(0);
    for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter)
    {
        if (it->getPosition()[0] > precursor_weight)
        {
            break;
        }
    }
    if (peak_counter < new_CID_spec.size())
    {
        new_CID_spec.resize(peak_counter);
    }


    static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight();

    Peak1D p;
    p.setIntensity(1);
    p.setPosition(oxonium_mass);

    new_CID_spec.push_back(p);

    p.setPosition(precursor_weight);
    new_CID_spec.push_back(p);

    // add complement to spectrum
    /*
    for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1)
    {
    // get m/z of complement
    double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U;

    // search if peaks are available that have similar m/z values
    Size count(0);
    bool found(false);
    for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count)
    {
    if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance)
    {
      // add peak intensity to corresponding peak in new_CID_spec
      new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity());
    }
    }
    if (!found)
    {
    // infer this peak
    Peak1D p;
    p.setIntensity(it1->getIntensity());
    p.setPosition(mz_comp);
    new_CID_spec.push_back(p);
    }
    }*/

    CompNovoIonScoringCID ion_scoring;
    Param ion_scoring_param(ion_scoring.getParameters());
    ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_);
    ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_);
    ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_);
    ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold"));
    ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score"));
    ion_scoring_param.setValue("max_isotope", max_isotope_);
    ion_scoring.setParameters(ion_scoring_param);

    Map<double, IonScore> ion_scores;
    ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge);

    new_CID_spec.sortByPosition();

    /*
    cerr << "Size of ion_scores " << ion_scores.size() << endl;
    for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
        cerr << it->first << " " << it->second.score << endl;
    }*/

#ifdef WRITE_SCORED_SPEC
    PeakSpectrum filtered_spec(new_CID_spec);
    filtered_spec.clear();
    for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
        Peak1D p;
        p.setIntensity(it->second.score);
        p.setPosition(it->first);
        filtered_spec.push_back(p);
    }
    DTAFile().store("spec_scored.dta", filtered_spec);
#endif

    set<String> sequences;
    getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores);

#ifdef SPIKE_IN
    sequences.insert("AFCVDGEGR");
    sequences.insert("APEFAAPWPDFVPR");
    sequences.insert("AVKQFEESQGR");
    sequences.insert("CCTESLVNR");
    sequences.insert("DAFLGSFLYEYSR");
    sequences.insert("DAIPENLPPLTADFAEDK");
    sequences.insert("DDNKVEDIWSFLSK");
    sequences.insert("DDPHACYSTVFDK");
    sequences.insert("DEYELLCLDGSR");
    sequences.insert("DGAESYKELSVLLPNR");
    sequences.insert("DGASCWCVDADGR");
    sequences.insert("DLFIPTCLETGEFAR");
    sequences.insert("DTHKSEIAHR");
    sequences.insert("DVCKNYQEAK");
    sequences.insert("EACFAVEGPK");
    sequences.insert("ECCHGDLLECADDR");
    sequences.insert("EFLGDKFYTVISSLK");
    sequences.insert("EFTPVLQADFQK");
    sequences.insert("ELFLDSGIFQPMLQGR");
    sequences.insert("ETYGDMADCCEK");
    sequences.insert("EVGCPSSSVQEMVSCLR");
    sequences.insert("EYEATLEECCAK");
    sequences.insert("FADLIQSGTFQLHLDSK");
    sequences.insert("FFSASCVPGATIEQK");
    sequences.insert("FLANVSTVLTSK");
    sequences.insert("FLSGSDYAIR");
    sequences.insert("FTASCPPSIK");
    sequences.insert("GAIEWEGIESGSVEQAVAK");
    sequences.insert("GDVAFIQHSTVEENTGGK");
    sequences.insert("GEPPSCAEDQSCPSER");
    sequences.insert("GEYVPTSLTAR");
    sequences.insert("GQEFTITGQKR");
    sequences.insert("GTFAALSELHCDK");
    sequences.insert("HLVDEPQNLIK");
    sequences.insert("HQDCLVTTLQTQPGAVR");
    sequences.insert("HTTVNENAPDQK");
    sequences.insert("ILDCGSPDTEVR");
    sequences.insert("KCPSPCQLQAER");
    sequences.insert("KGTEFTVNDLQGK");
    sequences.insert("KQTALVELLK");
    sequences.insert("KVPQVSTPTLVEVSR");
    sequences.insert("LALQFTTNAKR");
    sequences.insert("LCVLHEKTPVSEK");
    sequences.insert("LFTFHADICTLPDTEK");
    sequences.insert("LGEYGFQNALIVR");
    sequences.insert("LHVDPENFK");
    sequences.insert("LKECCDKPLLEK");
    sequences.insert("LKHLVDEPQNLIK");
    sequences.insert("LKPDPNTLCDEFK");
    sequences.insert("LLGNVLVVVLAR");
    sequences.insert("LLVVYPWTQR");
    sequences.insert("LRVDPVNFK");
    sequences.insert("LTDEELAFPPLSPSR");
    sequences.insert("LVNELTEFAK");
    sequences.insert("MFLSFPTTK");
    sequences.insert("MPCTEDYLSLILNR");
    sequences.insert("NAPYSGYSGAFHCLK");
    sequences.insert("NECFLSHKDDSPDLPK");
    sequences.insert("NEPNKVPACPGSCEEVK");
    sequences.insert("NLQMDDFELLCTDGR");
    sequences.insert("QAGVQAEPSPK");
    sequences.insert("RAPEFAAPWPDFVPR");
    sequences.insert("RHPEYAVSVLLR");
    sequences.insert("RPCFSALTPDETYVPK");
    sequences.insert("RSLLLAPEEGPVSQR");
    sequences.insert("SAFPPEPLLCSVQR");
    sequences.insert("SAGWNIPIGTLLHR");
    sequences.insert("SCWCVDEAGQK");
    sequences.insert("SGNPNYPHEFSR");
    sequences.insert("SHCIAEVEK");
    sequences.insert("SISSGFFECER");
    sequences.insert("SKYLASASTMDHAR");
    sequences.insert("SLHTLFGDELCK");
    sequences.insert("SLLLAPEEGPVSQR");
    sequences.insert("SPPQCSPDGAFRPVQCK");
    sequences.insert("SREGDPLAVYLK");
    sequences.insert("SRQIPQCPTSCER");
    sequences.insert("TAGTPVSIPVCDDSSVK");
    sequences.insert("TCVADESHAGCEK");
    sequences.insert("TQFGCLEGFGR");
    sequences.insert("TVMENFVAFVDK");
    sequences.insert("TYFPHFDLSHGSAQVK");
    sequences.insert("TYMLAFDVNDEK");
    sequences.insert("VDEVGGEALGR");
    sequences.insert("VDLLIGSSQDDGLINR");
    sequences.insert("VEDIWSFLSK");
    sequences.insert("VGGHAAEYGAEALER");
    sequences.insert("VGTRCCTKPESER");
    sequences.insert("VKVDEVGGEALGR");
    sequences.insert("VKVDLLIGSSQDDGLINR");
    sequences.insert("VLDSFSNGMK");
    sequences.insert("VLSAADKGNVK");
    sequences.insert("VPQVSTPTLVEVSR");
    sequences.insert("VTKCCTESLVNR");
    sequences.insert("VVAASDASQDALGCVK");
    sequences.insert("VVAGVANALAHR");
    sequences.insert("YICDNQDTISSK");
    sequences.insert("YLASASTMDHAR");
    sequences.insert("YNGVFQECCQAEDK");
#endif

    SpectrumAlignmentScore spectra_zhang;
    spectra_zhang.setParameters(zhang_param);

    vector<PeptideHit> hits;
    Size missed_cleavages = param_.getValue("missed_cleavages");
    for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it)
    {

        Size num_missed = countMissedCleavagesTryptic_(*it);
        if (missed_cleavages < num_missed)
        {
            //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl;
            continue;
        }
        PeakSpectrum CID_sim_spec;
        getCIDSpectrum_(CID_sim_spec, *it, charge);

        //normalizer.filterSpectrum(CID_sim_spec);

        double cid_score = zhang_(CID_sim_spec, CID_spec);

        PeptideHit hit;
        hit.setScore(cid_score);

        hit.setSequence(getModifiedAASequence_(*it));
        hit.setCharge((Int)charge);   //TODO unify charge interface: int or size?
        hits.push_back(hit);
        //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl;
    }

    // rescore the top hits
    id.setHits(hits);
    id.assignRanks();

    hits = id.getHits();

    SpectrumAlignmentScore alignment_score;
    Param align_param(alignment_score.getParameters());
    align_param.setValue("tolerance", fragment_mass_tolerance_);
    align_param.setValue("use_linear_factor", "true");
    alignment_score.setParameters(align_param);

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl;
    }

    Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits");
    if (hits.size() > number_of_prescoring_hits)
    {
        hits.resize(number_of_prescoring_hits);
    }

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        PeakSpectrum CID_sim_spec;
        getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge);

        normalizer.filterSpectrum(CID_sim_spec);

        //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec);

        //double cid_score = spectra_zhang(CID_sim_spec, CID_spec);
        double cid_score = alignment_score(CID_sim_spec, CID_spec);

        //cerr << "Final: " << it->getSequence() << " " << cid_score << endl;

        it->setScore(cid_score);
    }

    id.setHits(hits);
    id.assignRanks();
    hits = id.getHits();

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl;
    }

    Size number_of_hits = param_.getValue("number_of_hits");
    if (id.getHits().size() > number_of_hits)
    {
        hits.resize(number_of_hits);
    }

    id.setHits(hits);
    id.assignRanks();

    return;
}
Пример #11
0
  void InspectInfile::handlePTMs(const String& modification_line, const String& modifications_filename, const bool monoisotopic)
  {
    PTMname_residues_mass_type_.clear();
    // to store the information about modifications from the ptm xml file
    std::map<String, pair<String, String> > ptm_informations;
    if (!modification_line.empty()) // if modifications are used look whether whether composition and residues (and type and name) is given, the name (type) is used (then the modifications file is needed) or only the mass and residues (and type and name) is given
    {
      vector<String> modifications, mod_parts;
      modification_line.split(':', modifications); // get the single modifications
      if (modifications.empty())
        modifications.push_back(modification_line);

      // to get masses from a formula
      EmpiricalFormula add_formula, substract_formula;

      String types = "OPT#FIX#";
      String name, residues, mass, type;

      // 0 - mass; 1 - composition; 2 - ptm name
      Int mass_or_composition_or_name(-1);

      for (vector<String>::const_iterator mod_i = modifications.begin(); mod_i != modifications.end(); ++mod_i)
      {
        if (mod_i->empty())
        {
          continue;
        }
        // clear the formulae
        add_formula = substract_formula = EmpiricalFormula();
        name = residues = mass = type = "";

        // get the single parts of the modification string
        mod_i->split(',', mod_parts);
        if (mod_parts.empty())
          mod_parts.push_back(*mod_i);
        mass_or_composition_or_name = -1;

        // check whether the first part is a mass, composition or name

        // check whether it is a mass
        try
        {
          mass = mod_parts.front();
          // to check whether the first part is a mass, it is converted into a float and then back into a string and compared to the given string
          // remove + signs because they don't appear in a float
          if (mass.hasPrefix("+"))
            mass.erase(0, 1);
          if (mass.hasSuffix("+"))
            mass.erase(mass.length() - 1, 1);
          if (mass.hasSuffix("-")) // a - sign at the end will not be converted
          {
            mass.erase(mass.length() - 1, 1);
            mass.insert(0, "-");
          }
          // if it is a mass
          if (String(mass.toFloat()) == mass)
            mass_or_composition_or_name = 0;
        }
        catch (Exception::ConversionError& /*c_e*/)
        {
          mass_or_composition_or_name = -1;
        }

        // check whether it is a name (look it up in the corresponding file)
        if (mass_or_composition_or_name == -1)
        {
          if (ptm_informations.empty()) // if the ptm xml file has not been read yet, read it
          {
            if (!File::exists(modifications_filename))
            {
              throw Exception::FileNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, modifications_filename);
            }
            if (!File::readable(modifications_filename))
            {
              throw Exception::FileNotReadable(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, modifications_filename);
            }

            // getting all available modifications from a file
            PTMXMLFile().load(modifications_filename, ptm_informations);
          }
          // if the modification cannot be found
          if (ptm_informations.find(mod_parts.front()) != ptm_informations.end())
          {
            mass = ptm_informations[mod_parts.front()].first; // composition
            residues = ptm_informations[mod_parts.front()].second; // residues
            name = mod_parts.front(); // name

            mass_or_composition_or_name = 2;
          }
        }

        // check whether it's an empirical formula / if a composition was given, get the mass
        if (mass_or_composition_or_name == -1)
          mass = mod_parts.front();
        if (mass_or_composition_or_name == -1 || mass_or_composition_or_name == 2)
        {
          // check whether there is a positive and a negative formula
          String::size_type pos = mass.find("-");
          try
          {
            if (pos != String::npos)
            {
              add_formula = EmpiricalFormula(mass.substr(0, pos));
              substract_formula = EmpiricalFormula(mass.substr(++pos));
            }
            else
            {
              add_formula = EmpiricalFormula(mass);
            }
            // sum up the masses
            if (monoisotopic)
              mass = String(add_formula.getMonoWeight() - substract_formula.getMonoWeight());
            else
              mass = String(add_formula.getAverageWeight() - substract_formula.getAverageWeight());
            if (mass_or_composition_or_name == -1)
              mass_or_composition_or_name = 1;
          }
          catch (Exception::ParseError& /*pe*/)
          {
            PTMname_residues_mass_type_.clear();
            throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, *mod_i, "There's something wrong with this modification. Aborting!");
          }
        }

        // now get the residues
        mod_parts.erase(mod_parts.begin());
        if (mass_or_composition_or_name < 2)
        {
          if (mod_parts.empty())
          {
            PTMname_residues_mass_type_.clear();
            throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, *mod_i, "No residues for modification given. Aborting!");
          }

          // get the residues
          residues = mod_parts.front();
          residues.substitute('*', 'X');
          residues.toUpper();
          mod_parts.erase(mod_parts.begin());
        }

        // get the type
        if (mod_parts.empty())
          type = "OPT";
        else
        {
          type = mod_parts.front();
          type.toUpper();
          if (types.find(type) != String::npos)
            mod_parts.erase(mod_parts.begin());
          else
            type = "OPT";
        }

        if (mod_parts.size() > 1)
        {
          PTMname_residues_mass_type_.clear();
          throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, *mod_i, "There's something wrong with the type of this modification. Aborting!");
        }

        // get the name
        if (mass_or_composition_or_name < 2)
        {
          if (mod_parts.empty())
            name = "PTM_" + String(PTMname_residues_mass_type_.size());
          else
            name = mod_parts.front();
        }

        // insert the modification
        if (PTMname_residues_mass_type_.find(name) == PTMname_residues_mass_type_.end())
        {
          PTMname_residues_mass_type_[name] = vector<String>(3);
          PTMname_residues_mass_type_[name][0] = residues;
          // mass must not have more than 5 digits after the . (otherwise the test may fail)
          PTMname_residues_mass_type_[name][1] = mass.substr(0, mass.find(".") + 6);
          PTMname_residues_mass_type_[name][2] = type;
        }
        else
        {
          PTMname_residues_mass_type_.clear();
          throw Exception::ParseError(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, *mod_i, "There's already a modification with this name. Aborting!");
        }
      }
    }
  }
  void CompNovoIdentificationBase::getCIDSpectrum_(PeakSpectrum & spec, const String & sequence, Size charge, DoubleReal prefix, DoubleReal suffix)
  {
    static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight();
    static DoubleReal nh3_mass = EmpiricalFormula("NH3").getMonoWeight();
    static DoubleReal co_mass = EmpiricalFormula("CO").getMonoWeight();
    Peak1D p;
    DoubleReal b_pos(0 + prefix);
    DoubleReal y_pos(h2o_mass + suffix);
    bool b_H2O_loss(false), b_NH3_loss(false), y_NH3_loss(false);

    for (Size i = 0; i != sequence.size() - 1; ++i)
    {
      char aa(sequence[i]);
      b_pos += aa_to_weight_[aa];

      char aa2(sequence[sequence.size() - i - 1]);
      y_pos += aa_to_weight_[aa2];
      for (Size z = 1; z <= charge && z < 3; ++z)
      {
        // b-ions
        if (b_pos >= min_mz_ && b_pos <= max_mz_)
        {
          for (Size j = 0; j != max_isotope_; ++j)
          {
            if (z == 1 /*|| b_pos > MIN_DOUBLE_MZ*/)
            {
              p.setPosition((b_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j + Constants::NEUTRON_MASS_U) / (DoubleReal)z);
              p.setIntensity(isotope_distributions_[(Size)b_pos][j] * 0.8 / (z * z));
              spec.push_back(p);
            }
          }
        }

        // b-ion losses
        if (b_pos - h2o_mass > min_mz_ && b_pos - h2o_mass < max_mz_)
        {
          if (b_H2O_loss || aa == 'S' || aa == 'T' || aa == 'E' || aa == 'D')
          {
            b_H2O_loss = true;
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - h2o_mass) / z);
            p.setIntensity(0.02 / (DoubleReal)(z * z));
            if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
          if (b_NH3_loss || aa == 'Q' || aa == 'N' || aa == 'R' || aa == 'K')
          {
            b_NH3_loss = true;
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - nh3_mass) / z);
            p.setIntensity(0.02 / (DoubleReal)(z * z));

            if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
        }

        // a-ions only for charge 1
        if (z == 1)
        {
          if (b_pos - co_mass > min_mz_ && b_pos - co_mass < max_mz_)
          {
            // a-ions
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - co_mass) / (DoubleReal)z);
            p.setIntensity(0.1f);
            spec.push_back(p);
          }
        }



        if (y_pos > min_mz_ && y_pos < max_mz_)
        {
          // y-ions
          for (Size j = 0; j != max_isotope_; ++j)
          {
            if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/)
            {
              p.setPosition((y_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)z);
              p.setIntensity(isotope_distributions_[(Size)y_pos][j] / (DoubleReal) (z * z));
              spec.push_back(p);
            }
          }

          // H2O loss
          p.setPosition((y_pos + z * Constants::PROTON_MASS_U - h2o_mass) / (DoubleReal)z);
          p.setIntensity(0.1 / (DoubleReal)(z * z));
          if (aa2 == 'Q')           // pyroglutamic acid formation
          {
            p.setIntensity(0.5f);
          }
          if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/)
          {
            spec.push_back(p);
          }

          // NH3 loss
          if (y_NH3_loss || aa2 == 'Q' || aa2 == 'N' || aa2 == 'R' || aa2 == 'K')
          {
            y_NH3_loss = true;
            p.setPosition((y_pos + z * Constants::PROTON_MASS_U - nh3_mass) / (DoubleReal)z);
            p.setIntensity(0.1 / (DoubleReal)(z * z));

            if (z == 1 /*|| y_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
        }
      }
    }

    // if Q1 abundant loss of water -> pyroglutamic acid formation

    if (sequence[0] == 'Q' && prefix == 0 && suffix == 0)
    {
      /*
      for (PeakSpectrum::Iterator it = spec.begin(); it != spec.end(); ++it)
      {
          it->setIntensity(it->getIntensity() * 0.5);
      }*/

      /*
      for (Size j = 0; j != max_isotope; ++j)
      {
  p.setPosition((precursor_weight + charge - 1 + j)/(DoubleReal)charge);
  p.setIntensity(isotope_distributions_[(Int)p.getPosition()[0]][j] * 0.1);
  spec.push_back(p);
      }
      */
    }


    spec.sortByPosition();

    return;
  }
Пример #13
0
  const Enzyme* EnzymesDB::parseEnzyme_(Map<String, String>& values) const
  {
    Enzyme* enzy_ptr = new Enzyme("unknown_enzyme", "");

    for (Map<String, String>::iterator it = values.begin(); it != values.end(); ++it)
    {
      String key(it->first);
      String value(it->second);

      if (key.hasSuffix(":Name"))
      {
        enzy_ptr->setName(value);
        continue;
      }
      if (key.hasSuffix(":RegEx"))
      {
        enzy_ptr->setRegEx(value);
        continue;
      }
      if (key.hasSuffix(":RegExDescription"))
      {
        enzy_ptr->setRegExDescription(value);
        continue;
      }
      if (key.hasSuffix(":NTermGain"))
     {
        enzy_ptr->setNTermGain(EmpiricalFormula(value));
        continue;
      }
      if (key.hasSuffix(":CTermGain"))
      {
        enzy_ptr->setCTermGain(EmpiricalFormula(value));
        continue;
      }
      if (key.hasSubstring("PSIid"))
      {
        // no PSIid defined?
        if (!key.hasSuffix(":"))
        {
          enzy_ptr->setPSIid(value);
        }
        continue;
      }
      if (key.hasSubstring("XTANDEMid"))
      {
        if (!key.hasSuffix(":"))
        {
          enzy_ptr->setXTANDEMid(value);
        }
        continue;
      }
      if (key.hasSubstring("OMSSAid"))
      {
        if (!key.hasSuffix(":"))
        {
          enzy_ptr->setOMSSAid(value.toInt());
        }
        continue;
      }
      if (key.hasSubstring("Synonyms"))
      {
        // no synonyms defined?
        if (!key.hasSuffix(":"))
        {
          enzy_ptr->addSynonym(value);
        }
        continue;
      }
      cerr << "unknown key: " << key << ", with value: " << value << endl;
    }
    return enzy_ptr;
  }
  void TheoreticalSpectrumGenerator::addPrecursorPeaks(RichPeakSpectrum & spec, const AASequence & peptide, Int charge)
  {
    bool add_metainfo(param_.getValue("add_metainfo").toBool());
    DoubleReal pre_int((DoubleReal)param_.getValue("precursor_intensity"));
    DoubleReal pre_int_H2O((DoubleReal)param_.getValue("precursor_H2O_intensity"));
    DoubleReal pre_int_NH3((DoubleReal)param_.getValue("precursor_NH3_intensity"));
    bool add_isotopes(param_.getValue("add_isotopes").toBool());
    int max_isotope((int)param_.getValue("max_isotope"));

    // precursor peak
    DoubleReal mono_pos = peptide.getMonoWeight(Residue::Full, charge) / DoubleReal(charge);
    if (add_isotopes)
    {
      IsotopeDistribution dist = peptide.getFormula(Residue::Full, charge).getIsotopeDistribution(max_isotope);
      UInt j(0);
      for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j)
      {
        p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge);
        p_.setIntensity(pre_int *  it->second);
        if (add_metainfo)
        {
          String name("[M+H]+");
          if (charge == 2)
          {
            name = "[M+2H]++";
          }
          p_.setMetaValue("IonName", name);
        }
        spec.push_back(p_);
      }
    }
    else
    {
      p_.setMZ(mono_pos);
      p_.setIntensity(pre_int);
      if (add_metainfo)
      {
        String name("[M+H]+");
        if (charge == 2)
        {
          name = "[M+2H]++";
        }
        p_.setMetaValue("IonName", name);
      }
      spec.push_back(p_);
    }
    // loss peaks of the precursor

    //loss of water
    EmpiricalFormula ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("H2O");
    mono_pos = ion.getMonoWeight() / DoubleReal(charge);
    if (add_isotopes)
    {
      IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope);
      UInt j(0);
      for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j)
      {
        p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge);
        p_.setIntensity(pre_int_H2O *  it->second);
        if (add_metainfo)
        {
          String name("[M+H]-H2O+");
          if (charge == 2)
          {
            name = "[M+2H]-H2O++";
          }
          p_.setMetaValue("IonName", name);
        }
        spec.push_back(p_);
      }
    }
    else
    {
      p_.setMZ(mono_pos);
      p_.setIntensity(pre_int_H2O);
      if (add_metainfo)
      {
        String name("[M+H]-H2O+");
        if (charge == 2)
        {
          name = "[M+2H]-H2O++";
        }
        p_.setMetaValue("IonName", name);
      }
      spec.push_back(p_);
    }

    //loss of ammonia
    ion = peptide.getFormula(Residue::Full, charge) - EmpiricalFormula("NH3");
    mono_pos = ion.getMonoWeight() / DoubleReal(charge);
    if (add_isotopes)
    {
      IsotopeDistribution dist = ion.getIsotopeDistribution(max_isotope);
      UInt j(0);
      for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j)
      {
        p_.setMZ((DoubleReal)(mono_pos + j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge);
        p_.setIntensity(pre_int_NH3 *  it->second);
        if (add_metainfo)
        {
          String name("[M+H]-NH3+");
          if (charge == 2)
          {
            name = "[M+2H]-NH3++";
          }
          p_.setMetaValue("IonName", name);
        }
        spec.push_back(p_);
      }
    }
    else
    {
      p_.setMZ(mono_pos);
      p_.setIntensity(pre_int_NH3);
      if (add_metainfo)
      {
        String name("[M+H]-NH3+");
        if (charge == 2)
        {
          name = "[M+2H]-NH3++";
        }
        p_.setMetaValue("IonName", name);
      }
      spec.push_back(p_);
    }

    spec.sortByPosition();
  }
  void TheoreticalSpectrumGenerator::addPeaks(RichPeakSpectrum & spectrum, const AASequence & peptide, Residue::ResidueType res_type, Int charge)
  {
    if (peptide.empty())
    {
      return;
    }

    Map<DoubleReal, AASequence> ions;
    Map<DoubleReal, String> names;
    AASequence ion;
    DoubleReal intensity(0);
    bool add_first_prefix_ion(param_.getValue("add_first_prefix_ion").toBool());

    // generate the ion peaks
    switch (res_type)
    {
    case Residue::AIon:
    {
      Size i = 1;
      if (!add_first_prefix_ion)
      {
        i = 2;
      }
      for (; i < peptide.size(); ++i)
      {
        ion = peptide.getPrefix(i);
        DoubleReal pos = ion.getMonoWeight(Residue::AIon, charge) / (DoubleReal)charge;
        ions[pos] = ion;
        names[pos] = "a" + String(i) + String(charge, '+');
      }
      intensity = (DoubleReal)param_.getValue("a_intensity");
      break;
    }

    case Residue::BIon:
    {
      Size i = 1;
      if (!add_first_prefix_ion)
      {
        i = 2;
      }
      for (; i < peptide.size(); ++i)
      {
        ion = peptide.getPrefix(i);
        DoubleReal pos = ion.getMonoWeight(Residue::BIon, charge) / (DoubleReal)charge;
        ions[pos] = ion;
        names[pos] = "b" + String(i) + String(charge, '+');
      }
      intensity = (DoubleReal)param_.getValue("b_intensity");
      break;
    }

    case Residue::CIon:
    {
      Size i = 1;
      if (!add_first_prefix_ion)
      {
        i = 2;
      }
      for (; i < peptide.size(); ++i)
      {
        ion = peptide.getPrefix(i);
        DoubleReal pos = ion.getMonoWeight(Residue::CIon, charge) / (DoubleReal)charge;
        ions[pos] = ion;
        names[pos] = "c" + String(i) + String(charge, '+');
      }
      intensity = (DoubleReal)param_.getValue("c_intensity");
      break;
    }

    case Residue::XIon:
    {
      for (Size i = 1; i < peptide.size(); ++i)
      {
        ion = peptide.getSuffix(i);
        DoubleReal pos = ion.getMonoWeight(Residue::XIon, charge) / (DoubleReal)charge;
        ions[pos] = ion;
        names[pos] = "x" + String(i) + String(charge, '+');
      }
      intensity = (DoubleReal)param_.getValue("x_intensity");
      break;
    }

    case Residue::YIon:
    {
      for (Size i = 1; i < peptide.size(); ++i)
      {
        ion = peptide.getSuffix(i);
        DoubleReal pos = ion.getMonoWeight(Residue::YIon, charge) / (DoubleReal)charge;
        ions[pos] = ion;
        names[pos] = "y" + String(i) + String(charge, '+');
      }
      intensity = (DoubleReal)param_.getValue("y_intensity");
      break;
    }

    case Residue::ZIon:
    {
      for (Size i = 1; i < peptide.size(); ++i)
      {
        ion = peptide.getSuffix(i);
        DoubleReal pos = ion.getMonoWeight(Residue::ZIon, charge) / (DoubleReal)charge;
        ions[pos] = ion;
        names[pos] = "z" + String(i) + String(charge, '+');
      }
      intensity = (DoubleReal)param_.getValue("z_intensity");
      break;
    }

    default:
      cerr << "Cannot create peaks of that ion type" << endl;
    }

    // get the params
    bool add_losses(param_.getValue("add_losses").toBool());
    bool add_metainfo(param_.getValue("add_metainfo").toBool());
    bool add_isotopes(param_.getValue("add_isotopes").toBool());
    Int max_isotope((Int)param_.getValue("max_isotope"));
    DoubleReal rel_loss_intensity((DoubleReal)param_.getValue("relative_loss_intensity"));

    for (Map<DoubleReal, AASequence>::ConstIterator cit = ions.begin(); cit != ions.end(); ++cit)
    {
      ion = cit->second;
      DoubleReal pos = cit->first;
      String ion_name = names[pos];
      if (add_isotopes)
      {
        IsotopeDistribution dist = ion.getFormula(res_type, charge).getIsotopeDistribution(max_isotope);
        UInt j(0);
        for (IsotopeDistribution::ConstIterator it = dist.begin(); it != dist.end(); ++it, ++j)
        {
          p_.setMZ((DoubleReal)(pos + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)charge);
          p_.setIntensity(intensity * it->second);
          if (add_metainfo && j == 0)
          {
            p_.setMetaValue("IonName", ion_name);
          }
          spectrum.push_back(p_);
        }
      }
      else
      {
        p_.setMZ(pos);
        p_.setIntensity(intensity);
        if (add_metainfo)
        {
          p_.setMetaValue("IonName", ion_name);
        }
        spectrum.push_back(p_);
      }

      if (add_losses)
      {
        set<String> losses;
        for (AASequence::ConstIterator it = cit->second.begin(); it != cit->second.end(); ++it)
        {
          if (it->hasNeutralLoss())
          {
            vector<EmpiricalFormula> loss_formulas = it->getLossFormulas();
            for (Size i = 0; i != loss_formulas.size(); ++i)
            {
              losses.insert(loss_formulas[i].toString());
            }
          }
        }


        if (!add_isotopes)
        {
          p_.setIntensity(intensity * rel_loss_intensity);
        }

        for (set<String>::const_iterator it = losses.begin(); it != losses.end(); ++it)
        {
          EmpiricalFormula loss_ion = ion.getFormula(res_type, charge) - EmpiricalFormula(*it);
          // thanks to Chris and Sandro
          // check for negative element frequencies (might happen if losses are not allowed for specific ions)
          bool negative_elements(false);
          for (EmpiricalFormula::ConstIterator eit = loss_ion.begin(); eit != loss_ion.end(); ++eit)
          {
            if (eit->second < 0)
            {
              negative_elements = true;
              break;
            }
          }
          if (negative_elements)
          {
            continue;
          }
          DoubleReal loss_pos = loss_ion.getMonoWeight() / (DoubleReal)charge;
          String loss_name = *it;

          if (add_isotopes)
          {
            IsotopeDistribution dist = loss_ion.getIsotopeDistribution(max_isotope);
            UInt j(0);
            for (IsotopeDistribution::ConstIterator iso = dist.begin(); iso != dist.end(); ++iso)
            {
              p_.setMZ((DoubleReal)(loss_pos + j) / (DoubleReal)charge);
              p_.setIntensity(intensity * rel_loss_intensity * iso->second);
              if (add_metainfo && j == 0)
              {
                p_.setMetaValue("IonName", ion_name + "-" + loss_name);
              }
              spectrum.push_back(p_);
            }
          }
          else
          {
            p_.setMZ(loss_pos);
            if (add_metainfo)
            {
              p_.setMetaValue("IonName", ion_name + "-" + loss_name);
            }
            spectrum.push_back(p_);
          }
        }
      }
    }

    if (add_metainfo)
    {
      p_.setMetaValue("IonName", String(""));
    }

    spectrum.sortByPosition();

    return;
  }