void DetectabilitySimulation::svmFilter_(SimTypes::FeatureMapSim& features)
  {

    // transform featuremap to peptides vector
    vector<String> peptides_vector(features.size());
    for (Size i = 0; i < features.size(); ++i)
    {
      peptides_vector[i] = features[i].getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString();
    }

    vector<double> labels;
    vector<double> detectabilities;
    predictDetectabilities(peptides_vector, labels, detectabilities);


    // copy all meta data stored in the feature map
    SimTypes::FeatureMapSim temp_copy(features);
    temp_copy.clear(false);

    for (Size i = 0; i < peptides_vector.size(); ++i)
    {

      if (detectabilities[i] > min_detect_)
      {
        features[i].setMetaValue("detectability", detectabilities[i]);
        temp_copy.push_back(features[i]);
      }
#ifdef DEBUG_SIM
      cout << detectabilities[i] << " " << min_detect_ << endl;
#endif
    }

    features.swap(temp_copy);
  }
  void DetectabilitySimulation::noFilter_(SimTypes::FeatureMapSim& features)
  {
    // set detectibility to 1.0 for all given peptides
    double defaultDetectibility = 1.0;

    for (SimTypes::FeatureMapSim::iterator feature_it = features.begin();
         feature_it != features.end();
         ++feature_it)
    {
      (*feature_it).setMetaValue("detectability", defaultDetectibility);
    }
  }
  void ICPLLabeler::addLabelToProteinHits_(SimTypes::FeatureMapSim& features, const String& label) const
  {
    // check if proteinIdentification exists before accessing it
    if (features.getProteinIdentifications().empty())
      return;

    for (std::vector<ProteinHit>::iterator protein_hit = features.getProteinIdentifications()[0].getHits().begin();
         protein_hit != features.getProteinIdentifications()[0].getHits().end();
         ++protein_hit)
    {
      AASequence aa = AASequence::fromString(protein_hit->getSequence());
      // modify only if the term is accessible
      if (aa.getNTerminalModification() == "")
      {
        aa.setNTerminalModification(label);
        protein_hit->setSequence(aa.toString());
      }
    }
  }
Example #4
0
  void MSSim::createFeatureMap_(const SimTypes::SampleProteins& proteins, SimTypes::FeatureMapSim& feature_map, Size map_index)
  {
    // clear feature map
    feature_map.clear(true);
    ProteinIdentification protIdent;

    for (SimTypes::SampleProteins::const_iterator it = proteins.begin(); it != proteins.end(); ++it)
    {
      // add new ProteinHit to ProteinIdentification
      ProteinHit protHit(0.0, 1, (it->entry).identifier, (it->entry).sequence);
      // copy all meta values from FASTA file parsing
      protHit = (it->meta);
      // additional meta values:
      protHit.setMetaValue("description", it->entry.description);
      protHit.setMetaValue("map_index", map_index);
      protIdent.insertHit(protHit);
    }

    vector<ProteinIdentification> vec_protIdent;
    vec_protIdent.push_back(protIdent);
    feature_map.setProteinIdentifications(vec_protIdent);
  }
Example #5
0
void SILACLabeler::applyLabelToProteinHit_(SimTypes::FeatureMapSim& channel, const String& arginine_label, const String& lysine_label) const
{
    for (std::vector<ProteinHit>::iterator protein_hit = channel.getProteinIdentifications()[0].getHits().begin();
            protein_hit != channel.getProteinIdentifications()[0].getHits().end();
            ++protein_hit)
    {
        AASequence aa = AASequence::fromString(protein_hit->getSequence());

        for (AASequence::Iterator residue = aa.begin(); residue != aa.end(); ++residue)
        {
            if (*residue == 'R')
            {
                aa.setModification(residue - aa.begin(), arginine_label);
            }
            else if (*residue == 'K')
            {
                aa.setModification(residue - aa.begin(), lysine_label);
            }
        }
        protein_hit->setSequence(aa.toString());
    }
}
Example #6
0
  void ITRAQLabeler::labelPeptide_(const Feature& feature, SimTypes::FeatureMapSim& result) const
  {
    // modify with iTRAQ modification (needed for mass calculation and MS/MS signal)
    //site="Y" - low abundance
    //site="N-term"
    //site="K" - lysine
    String modification = (itraq_type_ == ItraqConstants::FOURPLEX ? "iTRAQ4plex" : "iTRAQ8plex");
    vector<PeptideHit> pep_hits(feature.getPeptideIdentifications()[0].getHits());
    AASequence seq(pep_hits[0].getSequence());
    // N-term
    seq.setNTerminalModification(modification);
    // all "K":
    for (Size i = 0; i < seq.size(); ++i)
    {
      if (seq[i] == 'K' && !seq[i].isModified())
        seq.setModification(i, modification);
    }
    result.resize(1);
    result[0] = feature;
    pep_hits[0].setSequence(seq);
    result[0].getPeptideIdentifications()[0].setHits(pep_hits);
    // some "Y":
    // for each "Y" create two new features, depending on labeling efficiency on "Y":
    if (y_labeling_efficiency_ == 0)
      return;

    for (Size i = 0; i < seq.size(); ++i)
    {
      if (seq[i] == 'Y' && !seq[i].isModified())
      {
        if (y_labeling_efficiency_ == 1)
        {
          addModificationToPeptideHit_(result.back(), modification, i);
        }
        else // double number of features:
        {
          Size f_count = result.size();
          for (Size f = 0; f < f_count; ++f)
          {
            // copy feature
            result.push_back(result[f]);
            // modify the copy
            addModificationToPeptideHit_(result.back(), modification, i);
            // adjust intensities:
            result.back().setIntensity(result.back().getIntensity() * y_labeling_efficiency_);
            result[f].setIntensity(result[f].getIntensity() * (1 - y_labeling_efficiency_));
          }
        }
      }
    }


  }
  void DigestSimulation::digest(SimTypes::FeatureMapSim& feature_map)
  {
    LOG_INFO << "Digest Simulation ... started" << std::endl;

    if ((String)param_.getValue("enzyme") == String("none"))
    {
      //peptides = proteins;
      // convert all proteins into peptides

      // for each protein_hit in the FeatureMap
      for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin();
           protein_hit != feature_map.getProteinIdentifications()[0].getHits().end();
           ++protein_hit)
      {
        // generate a PeptideHit hit with the correct link to the protein
        PeptideHit pep_hit(1.0, 1, 0, AASequence::fromString(protein_hit->getSequence()));
        PeptideEvidence pe;
        pe.setProteinAccession(protein_hit->getAccession());
        pep_hit.addPeptideEvidence(pe);

        // add the PeptideHit to the PeptideIdentification
        PeptideIdentification pep_id;
        pep_id.insertHit(pep_hit);

        // generate Feature with correct Intensity and corresponding PeptideIdentification
        Feature f;
        f.getPeptideIdentifications().push_back(pep_id);
        f.setIntensity(protein_hit->getMetaValue("intensity"));

        // copy intensity meta-values and additional annotations from Protein to Feature
        StringList keys;
        protein_hit->getKeys(keys);
        for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key)
        {
          f.setMetaValue(*it_key, protein_hit->getMetaValue(*it_key));
        }

        // add Feature to SimTypes::FeatureMapSim
        feature_map.push_back(f);
      }

      return;
    }


    UInt min_peptide_length = param_.getValue("min_peptide_length");
    bool use_log_model = param_.getValue("model") == "trained" ? true : false;
    UInt missed_cleavages = param_.getValue("model_naive:missed_cleavages");
    double cleave_threshold = param_.getValue("model_trained:threshold");

    EnzymaticDigestion digestion;
    digestion.setEnzyme(digestion.getEnzymeByName((String)param_.getValue("enzyme")));
    digestion.setLogModelEnabled(use_log_model);
    digestion.setLogThreshold(cleave_threshold);

    std::vector<AASequence> digestion_products;

    // keep track of generated features
    std::map<AASequence, Feature> generated_features;

    // Iterate through ProteinHits in the FeatureMap and digest them
    for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin();
         protein_hit != feature_map.getProteinIdentifications()[0].getHits().end();
         ++protein_hit)
    {
      // determine abundance of each digestion product (this is quite long now...)
      // we assume that each digestion product will have the same abundance
      // note: missed cleavages reduce overall abundance as they combine two (or more) single peptides

      // how many "atomic"(i.e. non-cleavable) peptides are created?
      digestion.setMissedCleavages(0);
      Size complete_digest_count = digestion.peptideCount(AASequence::fromString(protein_hit->getSequence()));
      // compute average number of "atomic" peptides summed from all digestion products
      Size number_atomic_whole = 0;
      Size number_of_digestion_products = 0;
      for (Size i = 0; (i <= missed_cleavages) && (i < complete_digest_count); ++i)
      {
        number_atomic_whole += (complete_digest_count - i) * (i + 1);
        number_of_digestion_products += (complete_digest_count - i);
      }

      // mean number of "atomic" peptides per digestion product is now: number_atomic_whole / number_of_digestion_products
      // -> thus abundance of a digestion product is: #proteins / avg#of"atomic"peptides
      // i.e.: protein->second / (number_atomic_whole / number_of_digestion_products)

      Map<String, SimTypes::SimIntensityType> intensities;
      StringList keys;
      protein_hit->getKeys(keys);
      for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key)
      {
        if (!it_key->hasPrefix("intensity"))
          continue;
        intensities[*it_key] = std::max(SimTypes::SimIntensityType(1), SimTypes::SimIntensityType(protein_hit->getMetaValue(*it_key))
                                        * SimTypes::SimIntensityType(number_of_digestion_products)
                                        / SimTypes::SimIntensityType(number_atomic_whole)); // order changed for numeric stability
      }

      // do real digest
      digestion.setMissedCleavages(missed_cleavages);
      digestion.digest(AASequence::fromString(protein_hit->getSequence()), digestion_products);

      for (std::vector<AASequence>::const_iterator dp_it = digestion_products.begin();
           dp_it != digestion_products.end();
           ++dp_it)
      {
        if (dp_it->size() < min_peptide_length)
          continue;

        // sum equal peptide's intensities
        // *dp_it -> peptide
        // If we see this Peptide the first time -> generate corresponding feature
        if (generated_features.count(*dp_it) == 0)
        {
          PeptideHit pep_hit(1.0, 1, 0, *dp_it);

          PeptideIdentification pep_id;
          pep_id.insertHit(pep_hit);

          // create feature
          Feature f;
          f.getPeptideIdentifications().push_back(pep_id);
          // set intensity to 0 to avoid problems when summing up
          f.setIntensity(0.0);

          // copy all non-intensity meta values
          StringList lkeys;
          protein_hit->getKeys(lkeys);
          for (StringList::iterator key = lkeys.begin(); key != lkeys.end(); ++key)
          {
            if (!key->hasPrefix("intensity"))
            {
              f.setMetaValue(*key, protein_hit->getMetaValue(*key));
            }
          }

          // insert into map
          generated_features.insert(std::make_pair(*dp_it, f));
        }

        // sum up intensity values
        generated_features[*dp_it].setIntensity(generated_features[*dp_it].getIntensity() + intensities["intensity"]);
        // ... same for other intensities (iTRAQ...)
        for (Map<String, SimTypes::SimIntensityType>::const_iterator it_other = intensities.begin(); it_other != intensities.end(); ++it_other)
        {
          if (!generated_features[*dp_it].metaValueExists(it_other->first))
          {
            generated_features[*dp_it].setMetaValue(it_other->first, it_other->second);
          }
          else
          {
            generated_features[*dp_it].setMetaValue(it_other->first, SimTypes::SimIntensityType(generated_features[*dp_it].getMetaValue(it_other->first)) + it_other->second);
          }
        }

        // add current protein accession
        // existing proteins accessions ...
        std::set<String> protein_accessions = generated_features[*dp_it].getPeptideIdentifications()[0].getHits()[0].extractProteinAccessions();

        // ... add accession of current protein
        protein_accessions.insert(protein_hit->getAccession());

        std::vector<PeptideIdentification> pep_idents = generated_features[*dp_it].getPeptideIdentifications();
        std::vector<PeptideHit> pep_hits = pep_idents[0].getHits();

        for (std::set<String>::const_iterator s_it = protein_accessions.begin(); s_it != protein_accessions.end(); ++s_it)
        {
          PeptideEvidence pe;
          pe.setProteinAccession(*s_it);
          pep_hits[0].addPeptideEvidence(pe);
        }
        pep_idents[0].setHits(pep_hits);
        generated_features[*dp_it].setPeptideIdentifications(pep_idents);
      }
    }

    // add generated_features to FeatureMap
    for (std::map<AASequence, Feature>::iterator it_gf = generated_features.begin();
         it_gf != generated_features.end();
         ++it_gf)
    {
      // round up intensity
      (it_gf->second).setIntensity(ceil((it_gf->second).getIntensity()));
      feature_map.push_back(it_gf->second);
    }

  }