int main()
{
  // construct a AASequence object, query a residue
  // and output some of its properties
  AASequence aas = AASequence::fromString("DECIANGER");
  cout << aas[2].getName() << " "
       << aas[2].getFormula().toString() << " "
       << aas[2].getModificationName() << " "
       << aas[2].getMonoWeight() << endl;
  
  // find a modification in ModificationsDB
  // and output some of its properties
  // getInstance() returns a pointer to a ModsDB instance
  ResidueModification mod = ModificationsDB::getInstance()->getModification("Carbamidomethyl (C)");
  cout << mod.getOrigin() << " "
       << mod.getFullId() << " "
       << mod.getDiffMonoMass() << " "
       << mod.getMonoMass() << endl;
  
  // set the modification on a residue of a peptide
  // and output some of its properties (the formula and mass have changed)
  // in this case ModificationsDB is used in the background
  // to relate the name of the mod to its attributes
  aas.setModification(2, "Carbamidomethyl (C)");
  cout << aas[2].getName() << " "
   	<< aas[2].getFormula().toString() << " "
   	<< aas[2].getModificationName() << " "
   	<< aas[2].getMonoWeight() << endl;

  return 0;
} //end of main
示例#2
0
  void OpenSwathDataAccessHelper::convertTargetedPeptide(const TargetedExperiment::Peptide& pep, OpenSwath::LightPeptide & p)
  {
    OpenSwath::LightModification m;
    OpenMS::ModificationsDB* mod_db = OpenMS::ModificationsDB::getInstance();

    p.id = pep.id;
    if (!pep.rts.empty())
    {
      p.rt = pep.rts[0].getCVTerms()["MS:1000896"][0].getValue().toString().toDouble();
    }
    p.charge = pep.getChargeState();
    p.sequence = pep.sequence;
    p.peptide_group_label = pep.getPeptideGroupLabel();

    p.protein_refs.clear();
    if (!pep.protein_refs.empty())
    {
      p.protein_refs.insert( p.protein_refs.begin(), pep.protein_refs.begin(), pep.protein_refs.end() ); 
    }

    // Mapping of peptide modifications
    {
      OpenMS::AASequence aa_sequence = TargetedExperimentHelper::getAASequence(pep);
      if ( !aa_sequence.getNTerminalModification().empty())
      {
          ResidueModification rmod = mod_db->getTerminalModification(aa_sequence.getNTerminalModification(), ResidueModification::N_TERM);
          m.location = -1;
          m.unimod_id = rmod.getUniModAccession();
          p.modifications.push_back(m);
      }
      if ( !aa_sequence.getCTerminalModification().empty())
      {
          ResidueModification rmod = mod_db->getTerminalModification(aa_sequence.getCTerminalModification(), ResidueModification::C_TERM);
          m.location = boost::numeric_cast<int>(aa_sequence.size());
          m.unimod_id = rmod.getUniModAccession();
          p.modifications.push_back(m);
      }
      for (Size i = 0; i != aa_sequence.size(); i++)
      {
        if (aa_sequence[i].isModified())
        {
          // search the residue in the modification database (if the sequence is valid, we should find it)
          ResidueModification rmod = mod_db->getModification(aa_sequence.getResidue(i).getOneLetterCode(),
                                                             aa_sequence.getResidue(i).getModification(), ResidueModification::ANYWHERE);
          m.location = boost::numeric_cast<int>(i);
          m.unimod_id = rmod.getUniModAccession();
          p.modifications.push_back(m);

        }
      }

    }
    // transition_exp.peptides.push_back(p);
  }
START_SECTION((double getNeutralLossMonoMass() const))
	NOT_TESTABLE
END_SECTION

START_SECTION((void setNeutralLossAverageMass(double average_mass)))
	ptr->setNeutralLossAverageMass(23.345678);
	TEST_REAL_SIMILAR(ptr->getNeutralLossAverageMass(), 23.345678)
END_SECTION

START_SECTION(double getNeutralLossAverageMass() const)
	NOT_TESTABLE
END_SECTION

START_SECTION((bool hasNeutralLoss() const))
	TEST_EQUAL(ptr->hasNeutralLoss(), true)
	ResidueModification mod;
	TEST_EQUAL(mod.hasNeutralLoss(), false)
	mod.setNeutralLossDiffFormula(EmpiricalFormula("H2O"));
	TEST_EQUAL(mod.hasNeutralLoss(), true)
END_SECTION

START_SECTION((void setFullId(const String& full_id)))
	ptr->setFullId("blubb_new_fullid");
	TEST_STRING_EQUAL(ptr->getFullId(), "blubb_new_fullid")
END_SECTION

START_SECTION((const String& getFullId() const))
	NOT_TESTABLE
END_SECTION

START_SECTION((void setUniModAccession(const String &id)))
  void MassDecompositionAlgorithm::updateMembers_()
  {
    // todo add accessor to tolerance, it is called very often in CID mode

    Map<char, double> aa_to_weight;

    set<const Residue *> residues = ResidueDB::getInstance()->getResidues((String)param_.getValue("residue_set"));

    for (set<const Residue *>::const_iterator it = residues.begin(); it != residues.end(); ++it)
    {
      aa_to_weight[(*it)->getOneLetterCode()[0]] = (*it)->getMonoWeight(Residue::Internal);
    }

    // now handle the modifications
    ModificationDefinitionsSet mod_set(param_.getValue("fixed_modifications"), param_.getValue("variable_modifications"));
    set<ModificationDefinition> fixed_mods = mod_set.getFixedModifications();
    for (set<ModificationDefinition>::const_iterator it = fixed_mods.begin(); it != fixed_mods.end(); ++it)
    {
      const ResidueModification& mod = it->getModification();
      char aa = ' ';
      if (mod.getOrigin() == 'X')
      {
        cerr << "MassDecompositionAlgorithm: Warning: cannot handle modification " << mod.getName() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl;
        continue;
      }
      else
      {
        aa = mod.getOrigin();
      }

      if (mod.getMonoMass() != 0)
      {
        aa_to_weight[aa] = mod.getMonoMass();
      }
      else
      {
        if (mod.getDiffMonoMass() != 0)
        {
          aa_to_weight[aa] += mod.getDiffMonoMass();
        }
        else
        {
          cerr << "MassDecompositionAlgorithm: Warning: cannot handle modification " << mod.getName() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl;
          continue;
        }
      }
    }

    const StringList mod_names(ListUtils::create<String>("a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z"));
    vector<String>::const_iterator actual_mod_name = mod_names.begin();
    set<ModificationDefinition> var_mods = mod_set.getVariableModifications();
    for (set<ModificationDefinition>::const_iterator it = var_mods.begin(); it != var_mods.end(); ++it)
    {
      ResidueModification mod = it->getModification();
      //cerr << it->getModification() << " " << mod.getOrigin() << " " << mod.getId() << " " << mod.getFullId() << " " << mod.getUniModAccession() << " " << mod.getPSIMODAccession() << endl;
      char aa = (*actual_mod_name)[0];
      char origin_aa = ' ';
      ++actual_mod_name;

      if (mod.getOrigin() == 'X')
      {
        cerr << "MassDecompositionAlgorithm: Warning: cannot handle modification " << mod.getName() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl;
        continue;
      }
      else
      {
        origin_aa = mod.getOrigin();
      }

      if (mod.getMonoMass() != 0)
      {
        aa_to_weight[aa] = mod.getMonoMass();
      }
      else
      {
        if (mod.getDiffMonoMass() != 0)
        {
          aa_to_weight[aa] = aa_to_weight[origin_aa] + mod.getDiffMonoMass();
        }
        else
        {
          cerr << "Warning: cannot handle modification " << mod.getName() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl;
          continue;
        }
      }
    }

    if (alphabet_ != nullptr)
    {
      delete alphabet_;
    }
    if (decomposer_ != nullptr)
    {
      delete decomposer_;
    }

    // init mass decomposer
    alphabet_ = new ims::IMSAlphabet();
    for (Map<char, double>::ConstIterator it = aa_to_weight.begin(); it != aa_to_weight.end(); ++it)
    {
      alphabet_->push_back(String(it->first), it->second);
    }

    // initializes weights
    ims::Weights weights(alphabet_->getMasses(), (double) param_.getValue("decomp_weights_precision"));

    // optimize alphabet by dividing by gcd
    weights.divideByGCD();

    // decomposes real values
    decomposer_ = new ims::RealMassDecomposer(weights);

    return;
  }
示例#5
0
  /// returns false on failure
  void translateModifications(StringList& static_mod_list, StringList& variable_mod_list)
  {
    // translating UNIMOD notation to MyriMatch notation of PTMs.
    ModificationDefinitionsSet mod_set(getStringList_("fixed_modifications"), getStringList_("variable_modifications"));
    if (!getStringList_("fixed_modifications").empty())
    {
      set<String> mod_names = mod_set.getFixedModificationNames();
      for (set<String>::const_iterator it = mod_names.begin(); it != mod_names.end(); ++it)
      {
        ResidueModification mod = ModificationsDB::getInstance()->getModification(*it);
        String origin = String(mod.getOrigin());
        String mass_diff = String(mod.getDiffMonoMass());
        if (origin == "N-term")
        {
          origin = "(";
        }
        else if (origin == "C-term")
        {
          origin = ")";
        }
        else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "N-term")
        {
          origin = "(" + origin;
        }
        else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "C-term")
        {
          origin = ")" + origin;
        }
        static_mod_list.push_back(origin + " " + mod.getDiffMonoMass());
      }
    }

    if (!getStringList_("variable_modifications").empty())
    {
      set<String> mod_names = mod_set.getVariableModificationNames();

      for (set<String>::const_iterator it = mod_names.begin(); it != mod_names.end(); ++it)
      {
        ResidueModification mod = ModificationsDB::getInstance()->getModification(*it);
        String origin = String(mod.getOrigin());
        String mass_diff = String(mod.getDiffMonoMass());
        if (origin == "N-term")
        {
          origin = "(";
        }
        else if (origin == "C-term")
        {
          origin = ")";
        }
        else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "N-term")
        {
          origin = "(" + origin;
        }
        else if (mod.getTermSpecificityName(mod.getTermSpecificity()) == "C-term")
        {
          origin = ")" + origin;
        }
        variable_mod_list.push_back(origin + " * " + mass_diff); // use * for all mods (no unique-per-mod symbol should be required)
      }
    }
  }
示例#6
0
  void createParamFile_(ostream& os)
  {
    os << "# comet_version " << getStringOption_("comet_version") << "\n";               //required as first line in the param file
    os << "# Comet MS/MS search engine parameters file.\n";
    os << "# Everything following the '#' symbol is treated as a comment.\n";
    os << "database_name = " << getStringOption_("database") << "\n";
    os << "decoy_search = " << 0 << "\n"; // 0=no (default), 1=concatenated search, 2=separate search
    os << "num_threads = " << getIntOption_("threads") << "\n";  // 0=poll CPU to set num threads; else specify num threads directly (max 64)

    // masses
    map<String,int> precursor_error_units;
    precursor_error_units["amu"] = 0;
    precursor_error_units["mmu"] = 1;
    precursor_error_units["ppm"] = 2;

    map<string,int> isotope_error;
    isotope_error["off"] = 0;
    isotope_error["-1/0/1/2/3"] = 1;
    isotope_error["-8/-4/0/4/8"] = 2;

    os << "peptide_mass_tolerance = " << getDoubleOption_("precursor_mass_tolerance") << "\n";
    os << "peptide_mass_units = " << precursor_error_units[getStringOption_("precursor_error_units")] << "\n";                  // 0=amu, 1=mmu, 2=ppm
    os << "mass_type_parent = " << 1 << "\n";                    // 0=average masses, 1=monoisotopic masses
    os << "mass_type_fragment = " << 1 << "\n";                  // 0=average masses, 1=monoisotopic masses
    os << "precursor_tolerance_type = " << 0 << "\n";            // 0=MH+ (default), 1=precursor m/z; only valid for amu/mmu tolerances
    os << "isotope_error = " << isotope_error[getStringOption_("isotope_error")] << "\n";                      // 0=off, 1=on -1/0/1/2/3 (standard C13 error), 2= -8/-4/0/4/8 (for +4/+8 labeling)

    // search enzyme

    String enzyme_name = getStringOption_("enzyme");
    String enzyme_number = String(ProteaseDB::getInstance()->getEnzyme(enzyme_name)->getCometID());

    map<string,int> num_enzyme_termini;
    num_enzyme_termini["semi"] = 1;
    num_enzyme_termini["fully"] = 2;
    num_enzyme_termini["C-term unspecific"] = 8;
    num_enzyme_termini["N-term unspecific"] = 9;

    os << "search_enzyme_number = " << enzyme_number << "\n";                // choose from list at end of this params file
    os << "num_enzyme_termini = " << num_enzyme_termini[getStringOption_("num_enzyme_termini")] << "\n";                  // 1 (semi-digested), 2 (fully digested, default), 8 C-term unspecific , 9 N-term unspecific
    os << "allowed_missed_cleavage = " << getIntOption_("allowed_missed_cleavages") << "\n";             // maximum value is 5; for enzyme search

    // Up to 9 variable modifications are supported
    // format:  <mass> <residues> <0=variable/else binary> <max_mods_per_peptide> <term_distance> <n/c-term> <required>
    //     e.g. 79.966331 STY 0 3 -1 0 0
    vector<String> variable_modifications_names = getStringList_("variable_modifications");
    vector<ResidueModification> variable_modifications = getModifications_(variable_modifications_names);
    if (variable_modifications.size() > 9)
    {
      throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Comet only supports 9 variable modifications. " + String(variable_modifications.size()) + " provided.");
    }

    int max_variable_mods_in_peptide = getIntOption_("max_variable_mods_in_peptide");
    Size var_mod_index = 0;

    // write out user specified modifications
    for (; var_mod_index < variable_modifications.size(); ++var_mod_index)
    {
      const ResidueModification mod = variable_modifications[var_mod_index];
      double mass = mod.getDiffMonoMass();
      String residues = mod.getOrigin();
      //TODO support binary groups, e.g. for SILAC
      int binary_group = 0;
      //TODO support mod-specific limit (default for now is the overall max per peptide)
      int max_current_mod_per_peptide = max_variable_mods_in_peptide;
      //TODO support term-distances?
      int term_distance = -1;
      int nc_term = 0;

      //TODO support agglomeration of Modifications to same AA. Watch out for nc_term value then.
      if (mod.getTermSpecificity() == ResidueModification::C_TERM)
      {
        residues = "c";
        term_distance = 0;
        // Since users need to specify mods that apply to multiple residues/terms separately
        // 3 and -1 should be equal for now.
        nc_term = 3;
      }
      else if (mod.getTermSpecificity() == ResidueModification::N_TERM)
      {
        residues = "n";
        term_distance = 0;
        // Since users need to specify mods that apply to multiple residues/terms separately
        // 2 and -1 should be equal for now.
        nc_term = 2;
      }
      else if (mod.getTermSpecificity() == ResidueModification::PROTEIN_N_TERM) // not yet available
      {
        term_distance = 0;
        nc_term = 0;
      }
      else if (mod.getTermSpecificity() == ResidueModification::PROTEIN_C_TERM) // not yet available
      {
        term_distance = 0;
        nc_term = 1;
      }

      //TODO support required variable mods
      bool required = false;

      os << "variable_mod0" << var_mod_index+1 << " = " << mass << " " << residues << " " << binary_group << " " << max_current_mod_per_peptide << " " << term_distance << " " << nc_term << " " << required << "\n";
    }

    // fill remaining modification slots (if any) in Comet with "no modification"
    for (; var_mod_index < 9; ++var_mod_index)
    {
      os << "variable_mod0" << var_mod_index+1 << " = " << "0.0 X 0 3 -1 0 0" << "\n";
    }

    os << "max_variable_mods_in_peptide = " << getIntOption_("max_variable_mods_in_peptide") << "\n";
    os << "require_variable_mod = " << (int) (getStringOption_("require_variable_mod") == "true") << "\n";

    // fragment ion defaults
    // ion trap ms/ms:  1.0005 tolerance, 0.4 offset (mono masses), theoretical_fragment_ions = 1
    // high res ms/ms:    0.02 tolerance, 0.0 offset (mono masses), theoretical_fragment_ions = 0

    String instrument = getStringOption_("instrument");
    double bin_tol = getDoubleOption_("fragment_bin_tolerance");
    double bin_offset = getDoubleOption_("fragment_bin_offset");
    if (instrument == "low_res" && (bin_tol < 0.9 || bin_offset <= 0.2))
    {
      LOG_WARN << "Fragment bin size or tolerance is quite low for low res instruments." << "\n";
    }
    else if (instrument == "high_res" && (bin_tol > 0.2 || bin_offset > 0.1))
    {
      LOG_WARN << "Fragment bin size or tolerance is quite high for high res instruments." << "\n";
    };

    os << "fragment_bin_tol = " << bin_tol << "\n";               // binning to use on fragment ions
    os << "fragment_bin_offset = " << bin_offset  << "\n";              // offset position to start the binning (0.0 to 1.0)
    os << "theoretical_fragment_ions = " << (int)(instrument == "low_res") << "\n";           // 0=use flanking bin, 1=use M bin only
    os << "use_A_ions = " << (int)(getStringOption_("use_A_ions")=="true") << "\n";
    os << "use_B_ions = " << (int)(getStringOption_("use_B_ions")=="true") << "\n";
    os << "use_C_ions = " << (int)(getStringOption_("use_C_ions")=="true") << "\n";
    os << "use_X_ions = " << (int)(getStringOption_("use_X_ions")=="true") << "\n";
    os << "use_Y_ions = " << (int)(getStringOption_("use_Y_ions")=="true") << "\n";
    os << "use_Z_ions = " << (int)(getStringOption_("use_Z_ions")=="true") << "\n";
    os << "use_NL_ions = " << (int)(getStringOption_("use_NL_ions")=="true") << "\n";                         // 0=no, 1=yes to consider NH3/H2O neutral loss peaks

    // output
    os << "output_sqtstream = " << 0 << "\n";                    // 0=no, 1=yes  write sqt to standard output
    os << "output_sqtfile = " << 0 << "\n";                      // 0=no, 1=yes  write sqt file
    os << "output_txtfile = " << 0 << "\n";                     // 0=no, 1=yes  write tab-delimited txt file
    os << "output_pepxmlfile = " << 1 << "\n";                   // 0=no, 1=yes  write pep.xml file

    os << "output_percolatorfile = " << !getStringOption_("pin_out").empty() << "\n";              // 0=no, 1=yes  write Percolator tab-delimited input file
    os << "output_outfiles = " <<  0 << "\n";                    // 0=no, 1=yes  write .out files
    os << "print_expect_score = " << 1 << "\n";                  // 0=no, 1=yes to replace Sp with expect in out & sqt
    os << "num_output_lines = " << getIntOption_("num_hits") << "\n";                    // num peptide results to show
    os << "show_fragment_ions = " << 0 << "\n";                  // 0=no, 1=yes for out files only
    os << "sample_enzyme_number = " << enzyme_number << "\n";                // Sample enzyme which is possibly different than the one applied to the search.

    // mzXML parameters
    map<string,int> override_charge;
    override_charge["keep any known"] = 0;
    override_charge["ignore known"] = 1;
    override_charge["ignore outside range"] = 2;
    override_charge["keep known search unknown"] = 3;

    int precursor_charge_min(0), precursor_charge_max(0);
    if (!parseRange_(getStringOption_("precursor_charge"), precursor_charge_min, precursor_charge_max))
    {
      LOG_INFO << "precursor_charge range not set. Defaulting to 0:0 (disable charge filtering)." << endl;
    }

    os << "scan_range = " << "0 0" << "\n";                        // start and scan scan range to search; 0 as 1st entry ignores parameter
    os << "precursor_charge = " << precursor_charge_min << " " << precursor_charge_max << "\n";                  // precursor charge range to analyze; does not override any existing charge; 0 as 1st entry ignores parameter
    os << "override_charge = " << override_charge[getStringOption_("override_charge")] << "\n";                     // 0=no, 1=override precursor charge states, 2=ignore precursor charges outside precursor_charge range, 3=see online
    os << "ms_level = " << getIntOption_("ms_level") << "\n";                            // MS level to analyze, valid are levels 2 (default) or 3
    os << "activation_method = " << getStringOption_("activation_method") << "\n";                 // activation method; used if activation method set; allowed ALL, CID, ECD, ETD, PQD, HCD, IRMPD

    // misc parameters
    double digest_mass_range_min(600.0), digest_mass_range_max(5000.0);
    if (!parseRange_(getStringOption_("digest_mass_range"), digest_mass_range_min, digest_mass_range_max))
    {
      LOG_INFO << "digest_mass_range not set. Defaulting to 600.0 5000.0." << endl;
    }

    os << "digest_mass_range = " << digest_mass_range_min << " " << digest_mass_range_max << "\n";        // MH+ peptide mass range to analyze
    os << "num_results = " << 100 << "\n";                       // number of search hits to store internally
    os << "skip_researching = " << 1 << "\n";                    // for '.out' file output only, 0=search everything again (default), 1=don't search if .out exists
    os << "max_fragment_charge = " << getIntOption_("max_fragment_charge") << "\n";                 // set maximum fragment charge state to analyze (allowed max 5)
    os << "max_precursor_charge = " << getIntOption_("max_precursor_charge") << "\n";                // set maximum precursor charge state to analyze (allowed max 9)
    os << "nucleotide_reading_frame = " << 0 << "\n";            // 0=proteinDB, 1-6, 7=forward three, 8=reverse three, 9=all six
    os << "clip_nterm_methionine = " << (int)(getStringOption_("clip_nterm_methionine")=="true") << "\n";              // 0=leave sequences as-is; 1=also consider sequence w/o N-term methionine
    os << "spectrum_batch_size = " << getIntOption_("spectrum_batch_size") << "\n";                 // max. // of spectra to search at a time; 0 to search the entire scan range in one loop
    os << "decoy_prefix = " << "--decoysearch-not-used--" << "\n";                 // decoy entries are denoted by this string which is pre-pended to each protein accession
    os << "output_suffix = " << "" << "\n";                      // add a suffix to output base names i.e. suffix "-C" generates base-C.pep.xml from base.mzXML input
    os << "mass_offsets = " << ListUtils::concatenate(getDoubleList_("mass_offsets"), " ") << "\n"; // one or more mass offsets to search (values subtracted from deconvoluted precursor mass)

    // spectral processing
    map<string,int> remove_precursor_peak;
    remove_precursor_peak["no"] = 0;
    remove_precursor_peak["yes"] = 1;
    remove_precursor_peak["charge_reduced"] = 2;
    remove_precursor_peak["phosphate_loss"] = 3;

    double clear_mz_range_min(0.0), clear_mz_range_max(0.0);
    if (!parseRange_(getStringOption_("clear_mz_range"), clear_mz_range_min, clear_mz_range_max))
    {
      LOG_INFO << "clear_mz_range not set. Defaulting to 0:0 (disable m/z filter)." << endl;
    }

    os << "minimum_peaks = " << getIntOption_("minimum_peaks") << "\n";                      // required minimum number of peaks in spectrum to search (default 10)
    os << "minimum_intensity = " << getDoubleOption_("minimum_intensity") << "\n";                   // minimum intensity value to read in
    os << "remove_precursor_peak = " << remove_precursor_peak[getStringOption_("remove_precursor_peak")] << "\n";               // 0=no, 1=yes, 2=all charge reduced precursor peaks (for ETD)
    os << "remove_precursor_tolerance = " << getDoubleOption_("remove_precursor_tolerance") << "\n";        // +- Da tolerance for precursor removal
    os << "clear_mz_range = " << clear_mz_range_min << " " << clear_mz_range_max << "\n";                // for iTRAQ/TMT type data; will clear out all peaks in the specified m/z range


    // write fixed modifications - if not specified residue parameter is zero
    // Aminoacid:
    //      add_AA.OneletterCode_AA.ThreeLetterCode = xxx
    // Terminus:
    //      add_N/Cterm_peptide = xxx       protein not available yet
    vector<String> fixed_modifications_names = getStringList_("fixed_modifications");
    vector<ResidueModification> fixed_modifications = getModifications_(fixed_modifications_names);
    // Comet sets Carbamidometyl (C) as modification as default even if not specified
    // Therefor there is the need to set it to 0 if not set as flag
    if (fixed_modifications.empty())
    {
      os << "add_C_cysteine = 0.0000" << endl;
    }
    else
    {
      for (vector<ResidueModification>::const_iterator it = fixed_modifications.begin(); it != fixed_modifications.end(); ++it)
      {
        String AA = it->getOrigin();
        if ((AA!="N-term") && (AA!="C-term"))
        {
          const Residue* r = ResidueDB::getInstance()->getResidue(AA);
          String name = r->getName();
          os << "add_" << r->getOneLetterCode() << "_" << name.toLower() << " = " << it->getDiffMonoMass() << endl;
        }
        else
        {
          os << "add_" << AA.erase(1,1) << "_peptide = " << it->getDiffMonoMass() << endl;
        }
      }
    }

    //TODO register cut_before and cut_after in Enzymes.xml plus datastructures to add all our Enzymes with our names instead.
    // COMET_ENZYME_INFO _must_ be at the end of this parameters file
    os << "[COMET_ENZYME_INFO]" << "\n";
    os << "0.  No_enzyme              0      -           -" << "\n";
    os << "1.  Trypsin                1      KR          P" << "\n";
    os << "2.  Trypsin/P              1      KR          -" << "\n";
    os << "3.  Lys_C                  1      K           P" << "\n";
    os << "4.  Lys_N                  0      K           -" << "\n";
    os << "5.  Arg_C                  1      R           P" << "\n";
    os << "6.  Asp_N                  0      D           -" << "\n";
    os << "7.  CNBr                   1      M           -" << "\n";
    os << "8.  Glu_C                  1      DE          P" << "\n";
    os << "9.  PepsinA                1      FL          P" << "\n";
    os << "10. Chymotrypsin           1      FWYL        P" << "\n";
  }
示例#7
0
  void Residue::setModification(const String & modification)
  {
    //modification_ = modification;

    ModificationsDB * mod_db = ModificationsDB::getInstance();
    ResidueModification mod = mod_db->getModification(one_letter_code_, modification, ResidueModification::ANYWHERE);

    modification_ = mod.getId();
    // update all the members
    if (mod.getAverageMass() != 0)
    {
      average_weight_ = mod.getAverageMass();
    }
    if (mod.getMonoMass() != 0)
    {
      mono_weight_ = mod.getMonoMass();
    }

    bool updated_formula(false);
    if (!mod.getDiffFormula().isEmpty())
    {
      updated_formula = true;
      setFormula(getFormula() + mod.getDiffFormula());
    }
    if (mod.getFormula() != "" && !updated_formula)
    {
      updated_formula = true;
      String formula = mod.getFormula();
      formula.removeWhitespaces();
      formula_ = EmpiricalFormula(formula);
    }

    if (updated_formula)
    {
      average_weight_ = formula_.getAverageWeight();
      mono_weight_ = formula_.getMonoWeight();
    }
    else
    {
      if (mod.getAverageMass() != 0)
      {
        average_weight_ = mod.getAverageMass();
      }
      if (mod.getMonoMass() != 0)
      {
        mono_weight_ = mod.getMonoMass();
      }
    }

    // neutral losses
    loss_formulas_.clear();
    loss_names_.clear();
    if (mod.hasNeutralLoss())
    {
      loss_formulas_.push_back(mod.getNeutralLossDiffFormula());
      loss_names_.push_back(mod.getNeutralLossDiffFormula().toString());
    }

    is_modified_ = true;
  }
  void    CompNovoIdentificationBase::updateMembers_()
  {
    // init residue mass table
    String residue_set(param_.getValue("residue_set"));

    set<const Residue *> residues = ResidueDB::getInstance()->getResidues(residue_set);
    for (set<const Residue *>::const_iterator it = residues.begin(); it != residues.end(); ++it)
    {
      aa_to_weight_[(*it)->getOneLetterCode()[0]] = (*it)->getMonoWeight(Residue::Internal);
    }

    max_number_aa_per_decomp_ = param_.getValue("max_number_aa_per_decomp");
    tryptic_only_ = param_.getValue("tryptic_only").toBool();
    fragment_mass_tolerance_ = (DoubleReal)param_.getValue("fragment_mass_tolerance");
    max_number_pivot_ = param_.getValue("max_number_pivot");
    decomp_weights_precision_ = (DoubleReal)param_.getValue("decomp_weights_precision");
    min_mz_ = (DoubleReal)param_.getValue("min_mz");
    max_mz_ = (DoubleReal)param_.getValue("max_mz");
    max_decomp_weight_ = (DoubleReal)param_.getValue("max_decomp_weight");
    max_subscore_number_ = param_.getValue("max_subscore_number");
    max_isotope_ = param_.getValue("max_isotope");

    name_to_residue_.clear();
    residue_to_name_.clear();

    // now handle the modifications
    ModificationDefinitionsSet mod_set((StringList)param_.getValue("fixed_modifications"), (StringList)param_.getValue("variable_modifications"));
    set<ModificationDefinition> fixed_mods = mod_set.getFixedModifications();

    for (set<ModificationDefinition>::const_iterator it = fixed_mods.begin(); it != fixed_mods.end(); ++it)
    {
      ResidueModification mod = ModificationsDB::getInstance()->getModification(it->getModification());
      char aa = ' ';
      if (mod.getOrigin().size() != 1 || mod.getOrigin() == "X")
      {
        cerr << "Warning: cannot handle modification " << it->getModification() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl;
        continue;
      }
      else
      {
        aa = mod.getOrigin()[0];
      }

      if (mod.getMonoMass() != 0)
      {
        aa_to_weight_[aa] = mod.getMonoMass();
      }
      else
      {
        if (mod.getDiffMonoMass() != 0)
        {
          aa_to_weight_[aa] += mod.getDiffMonoMass();
        }
        else
        {
          cerr << "Warning: cannot handle modification " << it->getModification() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl;
          continue;
        }
      }

      //cerr << "Setting fixed modification " << it->getModification() << " of amino acid '" << aa << "'; weight = " << aa_to_weight_[aa] << endl;

      const Residue * res = ResidueDB::getInstance()->getModifiedResidue(it->getModification());
      name_to_residue_[aa] = res;
      residue_to_name_[res] = aa;
    }

    const StringList mod_names(StringList::create("a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z"));
    vector<String>::const_iterator actual_mod_name = mod_names.begin();
    set<ModificationDefinition> var_mods = mod_set.getVariableModifications();
    for (set<ModificationDefinition>::const_iterator it = var_mods.begin(); it != var_mods.end(); ++it)
    {
      ResidueModification mod = ModificationsDB::getInstance()->getModification(it->getModification());
      char aa = (*actual_mod_name)[0];
      char origin_aa = ' ';
      ++actual_mod_name;

      if (mod.getOrigin().size() != 1 || mod.getOrigin() == "X")
      {
        cerr << "CompNovoIdentificationBase: Warning: cannot handle modification " << it->getModification() << ", because aa is ambiguous (" << mod.getOrigin() << "), ignoring modification!" << endl;
        continue;
      }
      else
      {
        origin_aa = mod.getOrigin()[0];
      }

      if (mod.getMonoMass() != 0)
      {
        aa_to_weight_[aa] = mod.getMonoMass();
      }
      else
      {
        if (mod.getDiffMonoMass() != 0)
        {
          aa_to_weight_[aa] = aa_to_weight_[origin_aa] + mod.getDiffMonoMass();
        }
        else
        {
          cerr << "CompNovoIdentificationBase: Warning: cannot handle modification " << it->getModification() << ", because no monoisotopic mass value was found! Ignoring modification!" << endl;
          continue;
        }
      }

      //cerr << "Mapping variable modification " << it->getModification() << " to letter '" << aa << "' (@" << origin_aa << "); weight = " << aa_to_weight_[aa] << endl;
      const Residue * res = ResidueDB::getInstance()->getModifiedResidue(it->getModification());
      name_to_residue_[aa] = res;
      residue_to_name_[res] = aa;
    }

    /*
    cerr << "Following masses are used for identification: " << endl;

    for (Map<char, DoubleReal>::const_iterator it = aa_to_weight_.begin(); it != aa_to_weight_.end(); ++it)
    {
        cerr << it->first << " " << precisionWrapper(it->second) << endl;
    }*/

    initIsotopeDistributions_();

    Param decomp_param(mass_decomp_algorithm_.getParameters());
    decomp_param.setValue("tolerance", fragment_mass_tolerance_);
    decomp_param.setValue("fixed_modifications", (StringList)param_.getValue("fixed_modifications"));
    decomp_param.setValue("variable_modifications", (StringList)param_.getValue("variable_modifications"));
    mass_decomp_algorithm_.setParameters(decomp_param);

    min_aa_weight_ = numeric_limits<DoubleReal>::max();
    for (Map<char, DoubleReal>::const_iterator it = aa_to_weight_.begin(); it != aa_to_weight_.end(); ++it)
    {
      if (min_aa_weight_ > it->second)
      {
        min_aa_weight_ = it->second;
      }
    }
    return;
  }