예제 #1
0
  void ITRAQLabeler::labelPeptide_(const Feature& feature, SimTypes::FeatureMapSim& result) const
  {
    // modify with iTRAQ modification (needed for mass calculation and MS/MS signal)
    //site="Y" - low abundance
    //site="N-term"
    //site="K" - lysine
    String modification = (itraq_type_ == ItraqConstants::FOURPLEX ? "iTRAQ4plex" : "iTRAQ8plex");
    vector<PeptideHit> pep_hits(feature.getPeptideIdentifications()[0].getHits());
    AASequence seq(pep_hits[0].getSequence());
    // N-term
    seq.setNTerminalModification(modification);
    // all "K":
    for (Size i = 0; i < seq.size(); ++i)
    {
      if (seq[i] == 'K' && !seq[i].isModified())
        seq.setModification(i, modification);
    }
    result.resize(1);
    result[0] = feature;
    pep_hits[0].setSequence(seq);
    result[0].getPeptideIdentifications()[0].setHits(pep_hits);
    // some "Y":
    // for each "Y" create two new features, depending on labeling efficiency on "Y":
    if (y_labeling_efficiency_ == 0)
      return;

    for (Size i = 0; i < seq.size(); ++i)
    {
      if (seq[i] == 'Y' && !seq[i].isModified())
      {
        if (y_labeling_efficiency_ == 1)
        {
          addModificationToPeptideHit_(result.back(), modification, i);
        }
        else // double number of features:
        {
          Size f_count = result.size();
          for (Size f = 0; f < f_count; ++f)
          {
            // copy feature
            result.push_back(result[f]);
            // modify the copy
            addModificationToPeptideHit_(result.back(), modification, i);
            // adjust intensities:
            result.back().setIntensity(result.back().getIntensity() * y_labeling_efficiency_);
            result[f].setIntensity(result[f].getIntensity() * (1 - y_labeling_efficiency_));
          }
        }
      }
    }


  }
  void DigestSimulation::digest(SimTypes::FeatureMapSim& feature_map)
  {
    LOG_INFO << "Digest Simulation ... started" << std::endl;

    if ((String)param_.getValue("enzyme") == String("none"))
    {
      //peptides = proteins;
      // convert all proteins into peptides

      // for each protein_hit in the FeatureMap
      for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin();
           protein_hit != feature_map.getProteinIdentifications()[0].getHits().end();
           ++protein_hit)
      {
        // generate a PeptideHit hit with the correct link to the protein
        PeptideHit pep_hit(1.0, 1, 0, AASequence::fromString(protein_hit->getSequence()));
        PeptideEvidence pe;
        pe.setProteinAccession(protein_hit->getAccession());
        pep_hit.addPeptideEvidence(pe);

        // add the PeptideHit to the PeptideIdentification
        PeptideIdentification pep_id;
        pep_id.insertHit(pep_hit);

        // generate Feature with correct Intensity and corresponding PeptideIdentification
        Feature f;
        f.getPeptideIdentifications().push_back(pep_id);
        f.setIntensity(protein_hit->getMetaValue("intensity"));

        // copy intensity meta-values and additional annotations from Protein to Feature
        StringList keys;
        protein_hit->getKeys(keys);
        for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key)
        {
          f.setMetaValue(*it_key, protein_hit->getMetaValue(*it_key));
        }

        // add Feature to SimTypes::FeatureMapSim
        feature_map.push_back(f);
      }

      return;
    }


    UInt min_peptide_length = param_.getValue("min_peptide_length");
    bool use_log_model = param_.getValue("model") == "trained" ? true : false;
    UInt missed_cleavages = param_.getValue("model_naive:missed_cleavages");
    double cleave_threshold = param_.getValue("model_trained:threshold");

    EnzymaticDigestion digestion;
    digestion.setEnzyme(digestion.getEnzymeByName((String)param_.getValue("enzyme")));
    digestion.setLogModelEnabled(use_log_model);
    digestion.setLogThreshold(cleave_threshold);

    std::vector<AASequence> digestion_products;

    // keep track of generated features
    std::map<AASequence, Feature> generated_features;

    // Iterate through ProteinHits in the FeatureMap and digest them
    for (std::vector<ProteinHit>::iterator protein_hit = feature_map.getProteinIdentifications()[0].getHits().begin();
         protein_hit != feature_map.getProteinIdentifications()[0].getHits().end();
         ++protein_hit)
    {
      // determine abundance of each digestion product (this is quite long now...)
      // we assume that each digestion product will have the same abundance
      // note: missed cleavages reduce overall abundance as they combine two (or more) single peptides

      // how many "atomic"(i.e. non-cleavable) peptides are created?
      digestion.setMissedCleavages(0);
      Size complete_digest_count = digestion.peptideCount(AASequence::fromString(protein_hit->getSequence()));
      // compute average number of "atomic" peptides summed from all digestion products
      Size number_atomic_whole = 0;
      Size number_of_digestion_products = 0;
      for (Size i = 0; (i <= missed_cleavages) && (i < complete_digest_count); ++i)
      {
        number_atomic_whole += (complete_digest_count - i) * (i + 1);
        number_of_digestion_products += (complete_digest_count - i);
      }

      // mean number of "atomic" peptides per digestion product is now: number_atomic_whole / number_of_digestion_products
      // -> thus abundance of a digestion product is: #proteins / avg#of"atomic"peptides
      // i.e.: protein->second / (number_atomic_whole / number_of_digestion_products)

      Map<String, SimTypes::SimIntensityType> intensities;
      StringList keys;
      protein_hit->getKeys(keys);
      for (StringList::const_iterator it_key = keys.begin(); it_key != keys.end(); ++it_key)
      {
        if (!it_key->hasPrefix("intensity"))
          continue;
        intensities[*it_key] = std::max(SimTypes::SimIntensityType(1), SimTypes::SimIntensityType(protein_hit->getMetaValue(*it_key))
                                        * SimTypes::SimIntensityType(number_of_digestion_products)
                                        / SimTypes::SimIntensityType(number_atomic_whole)); // order changed for numeric stability
      }

      // do real digest
      digestion.setMissedCleavages(missed_cleavages);
      digestion.digest(AASequence::fromString(protein_hit->getSequence()), digestion_products);

      for (std::vector<AASequence>::const_iterator dp_it = digestion_products.begin();
           dp_it != digestion_products.end();
           ++dp_it)
      {
        if (dp_it->size() < min_peptide_length)
          continue;

        // sum equal peptide's intensities
        // *dp_it -> peptide
        // If we see this Peptide the first time -> generate corresponding feature
        if (generated_features.count(*dp_it) == 0)
        {
          PeptideHit pep_hit(1.0, 1, 0, *dp_it);

          PeptideIdentification pep_id;
          pep_id.insertHit(pep_hit);

          // create feature
          Feature f;
          f.getPeptideIdentifications().push_back(pep_id);
          // set intensity to 0 to avoid problems when summing up
          f.setIntensity(0.0);

          // copy all non-intensity meta values
          StringList lkeys;
          protein_hit->getKeys(lkeys);
          for (StringList::iterator key = lkeys.begin(); key != lkeys.end(); ++key)
          {
            if (!key->hasPrefix("intensity"))
            {
              f.setMetaValue(*key, protein_hit->getMetaValue(*key));
            }
          }

          // insert into map
          generated_features.insert(std::make_pair(*dp_it, f));
        }

        // sum up intensity values
        generated_features[*dp_it].setIntensity(generated_features[*dp_it].getIntensity() + intensities["intensity"]);
        // ... same for other intensities (iTRAQ...)
        for (Map<String, SimTypes::SimIntensityType>::const_iterator it_other = intensities.begin(); it_other != intensities.end(); ++it_other)
        {
          if (!generated_features[*dp_it].metaValueExists(it_other->first))
          {
            generated_features[*dp_it].setMetaValue(it_other->first, it_other->second);
          }
          else
          {
            generated_features[*dp_it].setMetaValue(it_other->first, SimTypes::SimIntensityType(generated_features[*dp_it].getMetaValue(it_other->first)) + it_other->second);
          }
        }

        // add current protein accession
        // existing proteins accessions ...
        std::set<String> protein_accessions = generated_features[*dp_it].getPeptideIdentifications()[0].getHits()[0].extractProteinAccessions();

        // ... add accession of current protein
        protein_accessions.insert(protein_hit->getAccession());

        std::vector<PeptideIdentification> pep_idents = generated_features[*dp_it].getPeptideIdentifications();
        std::vector<PeptideHit> pep_hits = pep_idents[0].getHits();

        for (std::set<String>::const_iterator s_it = protein_accessions.begin(); s_it != protein_accessions.end(); ++s_it)
        {
          PeptideEvidence pe;
          pe.setProteinAccession(*s_it);
          pep_hits[0].addPeptideEvidence(pe);
        }
        pep_idents[0].setHits(pep_hits);
        generated_features[*dp_it].setPeptideIdentifications(pep_idents);
      }
    }

    // add generated_features to FeatureMap
    for (std::map<AASequence, Feature>::iterator it_gf = generated_features.begin();
         it_gf != generated_features.end();
         ++it_gf)
    {
      // round up intensity
      (it_gf->second).setIntensity(ceil((it_gf->second).getIntensity()));
      feature_map.push_back(it_gf->second);
    }

  }