Ejemplo n.º 1
0
 // merge all channels into the first one
 // no further influence of the simulation process needed
 void LabelFreeLabeler::setUpHook(SimTypes::FeatureMapSimVector& features)
 {
   if (features.size() == 1)
     return;
   else
   {
     LOG_INFO << "Merging input FASTA files into one. Intensities will be summed up if duplicates occur.";
     SimTypes::FeatureMapSim final_map = mergeProteinIdentificationsMaps_(features);
     features.clear();
     features.push_back(final_map);
   }
 }
void digestFeaturesMapSimVector_(SimTypes::FeatureMapSimVector& feature_maps)
{
  // digest here
  DigestSimulation digest_sim;
  Param p;
  p.setValue("model", "naive");
  p.setValue("model_naive:missed_cleavages", 0);
  digest_sim.setParameters(p);
  std::cout << digest_sim.getParameters() << std::endl;
  for(SimTypes::FeatureMapSimVector::iterator iter = feature_maps.begin() ; iter != feature_maps.end() ; ++iter)
  {
    digest_sim.digest((*iter));
  }
}
Ejemplo n.º 3
0
  /// Labeling between digestion and rt simulation
  /// Join all peptides with the same sequence into one feature
  /// channels are retained via metavalues
  /// if a peptide is not present in all channels, then there will be missing meta values! (so don't rely on them being present)
  void ITRAQLabeler::postDigestHook(SimTypes::FeatureMapSimVector& channels)
  {
    // merge channels into a single feature map
    SimTypes::FeatureMapSim final_feature_map = mergeProteinIdentificationsMaps_(channels);

    std::map<String, Size> peptide_to_feature;

    for (Size i = 0; i < channels.size(); ++i)
    {
      for (SimTypes::FeatureMapSim::iterator it_f_o = channels[i].begin();
           it_f_o != channels[i].end();
           ++it_f_o)
      {
        // derive iTRAQ labeled features from original sequence (might be more than one due to partial labeling)
        SimTypes::FeatureMapSim labeled_features;
        labelPeptide_(*it_f_o, labeled_features);
        for (SimTypes::FeatureMapSim::iterator it_f = labeled_features.begin();
             it_f != labeled_features.end();
             ++it_f)
        {
          const String& seq = it_f->getPeptideIdentifications()[0].getHits()[0].getSequence().toString();
          Size f_index;
          //check if we already have a feature for this peptide
          if (peptide_to_feature.count(seq) > 0)
          {
            f_index = peptide_to_feature[seq];
          }
          else // create new feature
          {
            final_feature_map.push_back(*it_f);
            // update map:
            f_index = final_feature_map.size() - 1;
            peptide_to_feature[seq] = f_index;
          }
          // add intensity as metavalue
          final_feature_map[f_index].setMetaValue(getChannelIntensityName(i), it_f->getIntensity());
          // increase overall intensity
          final_feature_map[f_index].setIntensity(final_feature_map[f_index].getIntensity() + it_f->getIntensity());
          mergeProteinAccessions_(final_feature_map[f_index], *it_f);
        }
      }
    }

    channels.clear();
    channels.push_back(final_feature_map);
  }
Ejemplo n.º 4
0
  void ITRAQLabeler::postRawTandemMSHook(SimTypes::FeatureMapSimVector& fm, SimTypes::MSSimExperiment& exp)
  {
    //std::cout << "Matrix used: \n" << ItraqConstants::translateIsotopeMatrix(itraq_type_, isotope_corrections_) << "\n\n";

    double rep_shift = param_.getValue("reporter_mass_shift");


    OPENMS_PRECONDITION(fm.size() == 1, "More than one feature map given in ITRAQLabeler::postRawTandemMSHook()!")
    EigenMatrixXdPtr channel_frequency = convertOpenMSMatrix2EigenMatrixXd(ItraqConstants::translateIsotopeMatrix(itraq_type_, isotope_corrections_));
    Eigen::MatrixXd itraq_intensity_sum(ItraqConstants::CHANNEL_COUNT[itraq_type_], 1);

    std::vector<Matrix<Int> > channel_names(2);
    channel_names[0].setMatrix<4, 1>(ItraqConstants::CHANNELS_FOURPLEX);
    channel_names[1].setMatrix<8, 1>(ItraqConstants::CHANNELS_EIGHTPLEX);

    boost::uniform_real<double> udist(0.0, 1.0);

    // add signal...
    for (SimTypes::MSSimExperiment::iterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->getMSLevel() != 2)
        continue;

      // reset sum matrix to 0
      itraq_intensity_sum.setZero();

      // add up signal of all features
      OPENMS_PRECONDITION(it->metaValueExists("parent_feature_ids"), "Meta value 'parent_feature_ids' missing in ITRAQLabeler::postRawTandemMSHook()!")
      IntList parent_fs = it->getMetaValue("parent_feature_ids");
      for (Size i_f = 0; i_f < parent_fs.size(); ++i_f)
      {
        // get RT scaled iTRAQ intensities
        EigenMatrixXdPtr row = getItraqIntensity_(fm[0][parent_fs[i_f]], it->getRT());

        // apply isotope matrix to active channels
        // row * channel_frequencyOld = observed iTRAQ intensities
        Eigen::MatrixXd itraq_intensity_observed = (*channel_frequency) * (*row);
        // add result to sum
        itraq_intensity_sum += itraq_intensity_observed;
      }

      // add signal to MS2 spectrum
      for (Int i_channel = 0; i_channel < ItraqConstants::CHANNEL_COUNT[itraq_type_]; ++i_channel)
      {
        SimTypes::MSSimExperiment::SpectrumType::PeakType p;
        // random shift of +-rep_shift around exact position
        double rnd_shift = udist(rng_->getTechnicalRng()) * 2 * rep_shift - rep_shift;
        p.setMZ(channel_names[itraq_type_].getValue(i_channel, 0) + 0.1 + rnd_shift);
        p.setIntensity(itraq_intensity_sum(i_channel, 0));
        it->push_back(p);
      }
    }

  }
Ejemplo n.º 5
0
 void ITRAQLabeler::setUpHook(SimTypes::FeatureMapSimVector& features)
 {
   // no action here .. just check for correct # of channels
   Size active_channel_count = 0;
   for (ChannelMapType::ConstIterator it = channel_map_.begin(); it != channel_map_.end(); ++it)
   {
     if (it->second.active)
       ++active_channel_count;
   }
   if (features.size() != active_channel_count)
   {
     throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("iTRAQ Labeling received wrong number of channels: ") + String(active_channel_count) + " defined, but " + String(features.size()) + " given as FASTA files.");
   }
 }
Ejemplo n.º 6
0
  void ICPLLabeler::setUpHook(SimTypes::FeatureMapSimVector& features)
  {
    // channel check
    if (features.size() < 2 || features.size() > 3)
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "We currently support only 2- or 3-channel ICPL");
    }

    if (param_.getValue("label_proteins") == "true") // loop for protein-labeling (pre-digest-labeling)
    {
      // first channel labeling (light)
      addLabelToProteinHits_(features[0], light_channel_label_);

      // second channel labeling (medium)
      addLabelToProteinHits_(features[1], medium_channel_label_);

      // third channel labeling (heavy) .. if exists
      if (features.size() == 3)
      {
        addLabelToProteinHits_(features[2], heavy_channel_label_);
      }
    }
  }
Ejemplo n.º 7
0
void SILACLabeler::setUpHook(SimTypes::FeatureMapSimVector& features_to_simulate)
{
    // check if we have the correct number of channels
    if (features_to_simulate.size() < 2 || features_to_simulate.size() > 3)
    {
        throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, String(features_to_simulate.size()) + " channel(s) given. We currently support only 2-channel SILAC. Please provide two FASTA files!");
    }

    SimTypes::FeatureMapSim& medium_channel = features_to_simulate[1];
    if (medium_channel.getProteinIdentifications().size() > 0)
    {
        applyLabelToProteinHit_(medium_channel, medium_channel_arginine_label_, medium_channel_lysine_label_);
    }

    //check for third channel and label
    if (features_to_simulate.size() == 3)
    {
        SimTypes::FeatureMapSim& heavy_channel = features_to_simulate[2];
        if (heavy_channel.getProteinIdentifications().size() > 0)
        {
            applyLabelToProteinHit_(heavy_channel, heavy_channel_arginine_label_, heavy_channel_lysine_label_);
        }
    }
}
void createTestFeatureMapSimVector_(SimTypes::FeatureMapSimVector& feature_maps, bool add3rd)
{
  feature_maps.clear();

  SimTypes::FeatureMapSim fm1,fm2,fm3;
  ProteinHit prothit1,prothit2,prothit3,prothit4,prothit5,prothit6,prothit7,prothit8,prothit9,prothit10, prothit11, prothit12;

  // create first map
  prothit1.setSequence("AAAAAAAKAAAAA"); // 2 Fragmente AAAAAAAK und AAAAA und kommt in allen Channels vor
  prothit1.setMetaValue("description", "test sequence 1");
  prothit1.setAccession("ACC1");
  prothit1.setMetaValue("intensity", 200.0);

  prothit2.setSequence("CNARCNCNCN"); // 2 Fragmente CNAR und CNCNCN und kommt in allen Channels vor
  prothit2.setMetaValue("description", "test sequence 2");
  prothit2.setAccession("ACC2");
  prothit2.setMetaValue("intensity", 80.0);

  prothit3.setSequence("CNHAADDAAAAA"); // ungelabelt, einzelnes Fragment
  prothit3.setMetaValue("description", "test sequence 3");
  prothit3.setAccession("ACC3");
  prothit3.setMetaValue("intensity", 100.0);

  prothit12.setSequence("VNAAAAAARVNCNCNAAAA"); // Ergebniss: CNAAAAAAR(Label Medium_R) , CNCNCNAAAA (einmal kommt in allen Channels vor)
  prothit12.setMetaValue("description", "test sequence 12");
  prothit12.setAccession("ACC5");
  prothit12.setMetaValue("intensity", 115.0);

  ProteinIdentification protIdent1;
  protIdent1.insertHit(prothit1);
  protIdent1.insertHit(prothit2);
  protIdent1.insertHit(prothit3);
  protIdent1.insertHit(prothit12);
  vector<ProteinIdentification> protIdents_vec1;
  protIdents_vec1.push_back(protIdent1);
  fm1.setProteinIdentifications(protIdents_vec1);

  // create labeled map
  prothit4.setSequence("AAAAAAAKAAAAA"); // Ergbeniss: AAAAAAAK(Label Medium_K) , AAAAA ( einmal kommt in allen Channels vor)
  prothit4.setMetaValue("description", "test sequence 4");
  prothit4.setAccession("ACC4");
  prothit4.setMetaValue("intensity", 50.0);

  prothit5.setSequence("CNARCNCNCN"); // Ergebniss: CNAR(Label Medium_R) , CNCNCN (einmal kommt in allen Channels vor)
  prothit5.setMetaValue("description", "test sequence 5");
  prothit5.setAccession("ACC5");
  prothit5.setMetaValue("intensity", 100.0);

  prothit6.setSequence("LDRCEL"); // Ergbeniss : LDR(label Medium_R) , CEL (einmal kommt in channel 2 und 3 vor)
  prothit6.setMetaValue("description", "test sequence 6");
  prothit6.setAccession("ACC6");
  prothit6.setMetaValue("intensity", 120.0);

  prothit11.setSequence("VNAAAAAARVNCNCNAAAA"); // Ergebniss: CNAAAAAAR(Label Medium_R) , CNCNCNAAAA (einmal kommt in allen Channels vor)
  prothit11.setMetaValue("description", "test sequence 11");
  prothit11.setAccession("ACC5");
  prothit11.setMetaValue("intensity", 110.0);


  ProteinIdentification protIdent2;
  protIdent2.insertHit(prothit4);
  protIdent2.insertHit(prothit5);
  protIdent2.insertHit(prothit6);
  protIdent2.insertHit(prothit11);
  vector<ProteinIdentification> protIdents_vec2;
  protIdents_vec2.push_back(protIdent2);
  fm2.setProteinIdentifications(protIdents_vec2);


  feature_maps.push_back(fm1);
  feature_maps.push_back(fm2);

  if (add3rd)
  {
    prothit7.setSequence("AAAAAAAKAAAAA"); // Ergebniss : AAAAAAAK(Label Heavy_K) , AAAAA ( einmal kommt in allen Channels vor )
    prothit7.setMetaValue("description", "test sequence 7");
    prothit7.setAccession("ACC7");
    prothit7.setMetaValue("intensity", 30.0);

    prothit8.setSequence("CNARCNCNCN"); // Ergebniss: CNAR(Label Heavy_R) , CNCNCN (einmal kommt in allen Channels vor)
    prothit8.setMetaValue("description", "test sequence 8");
    prothit8.setAccession("ACC8");
    prothit8.setMetaValue("intensity", 130.0);

    prothit9.setSequence("LDRCEL"); //Ergebniss: LDR(label Heavy_R) , CEL (einmal kommt in channel 2 und 3 vor)
    prothit9.setMetaValue("description", "test sequence 9");
    prothit9.setAccession("ACC9");
    prothit9.setMetaValue("intensity", 70.0);

    prothit10.setSequence("YCYCY"); //Ergebniss: YCYCY kommt nur in diesem Channel vor
    prothit10.setMetaValue("description", "test sequence 10");
    prothit10.setAccession("ACC10");
    prothit10.setMetaValue("intensity", 80.0);

    ProteinIdentification protIdent3;
    protIdent3.insertHit(prothit7);
    protIdent3.insertHit(prothit8);
    protIdent3.insertHit(prothit9);
    protIdent3.insertHit(prothit10);
    vector<ProteinIdentification> protIdents_vec3;
    protIdents_vec3.push_back(protIdent3);
    fm3.setProteinIdentifications(protIdents_vec3);
    feature_maps.push_back(fm3);
  }

}
Ejemplo n.º 9
0
void SILACLabeler::postDigestHook(SimTypes::FeatureMapSimVector& features_to_simulate)
{

    SimTypes::FeatureMapSim& light_channel_features = features_to_simulate[0];
    SimTypes::FeatureMapSim& medium_channel_features = features_to_simulate[1];

    // merge the generated feature maps and create consensus
    SimTypes::FeatureMapSim final_feature_map = mergeProteinIdentificationsMaps_(features_to_simulate);

    if (features_to_simulate.size() == 2)
    {
        Map<String, Feature> unlabeled_features_index;
        for (SimTypes::FeatureMapSim::iterator unlabeled_features_iter = light_channel_features.begin();
                unlabeled_features_iter != light_channel_features.end();
                ++unlabeled_features_iter)
        {
            (*unlabeled_features_iter).ensureUniqueId();
            unlabeled_features_index.insert(std::make_pair(
                                                (*unlabeled_features_iter).getPeptideIdentifications()[0].getHits()[0].getSequence().toString()
                                                ,
                                                *unlabeled_features_iter
                                            ));
        }

        // iterate over second map
        for (SimTypes::FeatureMapSim::iterator labeled_feature_iter = medium_channel_features.begin(); labeled_feature_iter != medium_channel_features.end(); ++labeled_feature_iter)
        {
            const String unmodified_sequence = getUnmodifiedSequence_(*labeled_feature_iter, medium_channel_arginine_label_, medium_channel_lysine_label_);

            // guarantee uniqueness
            (*labeled_feature_iter).ensureUniqueId();

            // check if we have a pair
            if (unlabeled_features_index.has(unmodified_sequence))
            {
                // own scope as we don't know what happens to 'f_modified' once we call erase() below
                Feature& unlabeled_feature = unlabeled_features_index[unmodified_sequence];
                // guarantee uniqueness
                unlabeled_feature.ensureUniqueId();

                // feature has a SILAC Label and is not equal to non-labeled
                if ((*labeled_feature_iter).getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
                {
                    // add features to final map
                    final_feature_map.push_back(*labeled_feature_iter);
                    final_feature_map.push_back(unlabeled_feature);

                    // create consensus feature
                    ConsensusFeature cf;
                    cf.insert(MEDIUM_FEATURE_MAPID_, *labeled_feature_iter);
                    cf.insert(LIGHT_FEATURE_MAPID_, unlabeled_feature);
                    cf.ensureUniqueId();
                    consensus_.push_back(cf);

                    // remove unlabeled feature
                    unlabeled_features_index.erase(unmodified_sequence);
                }
                else
                {
                    // merge features since they are equal
                    Feature final_feature = mergeFeatures_(*labeled_feature_iter, unmodified_sequence, unlabeled_features_index, 1, 2);
                    final_feature_map.push_back(final_feature);
                }
            }
            else // no SILAC pair, just add the labeled one
            {
                final_feature_map.push_back(*labeled_feature_iter);
            }
        }

        // add singletons from unlabeled channel
        // clean up unlabeled_index
        for (Map<String, Feature>::iterator unlabeled_index_iter = unlabeled_features_index.begin(); unlabeled_index_iter != unlabeled_features_index.end(); ++unlabeled_index_iter)
        {
            // the single ones from c0
            final_feature_map.push_back(unlabeled_index_iter->second);
        }
    }

    // merge three channels
    if (features_to_simulate.size() == 3)
    {

        // index of unlabeled channelunlabeled_feature
        Map<String, Feature> unlabeled_features_index;
        for (SimTypes::FeatureMapSim::iterator unlabeled_features_iter = light_channel_features.begin();
                unlabeled_features_iter != light_channel_features.end();
                ++unlabeled_features_iter)
        {
            (*unlabeled_features_iter).ensureUniqueId();
            unlabeled_features_index.insert(std::make_pair(
                                                (*unlabeled_features_iter).getPeptideIdentifications()[0].getHits()[0].getSequence().toString()
                                                ,
                                                *unlabeled_features_iter
                                            ));
        }

        // index of labeled channel
        Map<String, Feature> medium_features_index;
        for (SimTypes::FeatureMapSim::iterator labeled_features_iter = medium_channel_features.begin();
                labeled_features_iter != medium_channel_features.end();
                ++labeled_features_iter)
        {
            (*labeled_features_iter).ensureUniqueId();
            medium_features_index.insert(std::make_pair(
                                             getUnmodifiedSequence_(*labeled_features_iter, medium_channel_arginine_label_, medium_channel_lysine_label_)
                                             ,
                                             *labeled_features_iter
                                         ));
        }

        SimTypes::FeatureMapSim& heavy_labeled_features = features_to_simulate[2];
        for (SimTypes::FeatureMapSim::iterator heavy_labeled_feature_iter = heavy_labeled_features.begin();
                heavy_labeled_feature_iter != heavy_labeled_features.end();
                ++heavy_labeled_feature_iter)
        {

            Feature& heavy_feature = *heavy_labeled_feature_iter;
            heavy_feature.ensureUniqueId();

            String heavy_feature_unmodified_sequence = getUnmodifiedSequence_(heavy_feature, heavy_channel_arginine_label_, heavy_channel_lysine_label_);

            if (unlabeled_features_index.has(heavy_feature_unmodified_sequence) && medium_features_index.has(heavy_feature_unmodified_sequence))
            {
                // it is a triplet
                // c2 & c1 modified
                if (heavy_feature_unmodified_sequence != heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().toString())
                {
                    // add features to final map
                    final_feature_map.push_back(heavy_feature);
                    final_feature_map.push_back(medium_features_index[heavy_feature_unmodified_sequence]);
                    final_feature_map.push_back(unlabeled_features_index[heavy_feature_unmodified_sequence]);

                    ConsensusFeature c_triplet;
                    c_triplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
                    c_triplet.insert(LIGHT_FEATURE_MAPID_, unlabeled_features_index[heavy_feature_unmodified_sequence]);
                    c_triplet.insert(MEDIUM_FEATURE_MAPID_, medium_features_index[heavy_feature_unmodified_sequence]);
                    c_triplet.ensureUniqueId();

                    consensus_.push_back(c_triplet);
                }
                else
                {
                    // merge all three channels
                    Feature completeMerge = mergeAllChannelFeatures_(heavy_feature, heavy_feature_unmodified_sequence, unlabeled_features_index, medium_features_index);
                    final_feature_map.push_back(completeMerge);
                }
                // remove features from indices
                unlabeled_features_index.erase(heavy_feature_unmodified_sequence);
                medium_features_index.erase(heavy_feature_unmodified_sequence);
            }
            else if (unlabeled_features_index.has(heavy_feature_unmodified_sequence))
            {
                // 2nd case light and heavy pair
                if (heavy_feature_unmodified_sequence != heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().toString())
                {
                    // add features to final map
                    final_feature_map.push_back(heavy_feature);
                    final_feature_map.push_back(unlabeled_features_index[heavy_feature_unmodified_sequence]);

                    ConsensusFeature c_duplet;
                    c_duplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
                    c_duplet.insert(LIGHT_FEATURE_MAPID_, unlabeled_features_index[heavy_feature_unmodified_sequence]);
                    c_duplet.ensureUniqueId();

                    consensus_.push_back(c_duplet);
                }
                else
                {
                    // merge all three channels
                    Feature completeMerge = mergeFeatures_(heavy_feature, heavy_feature_unmodified_sequence, unlabeled_features_index, 1, 3);
                    final_feature_map.push_back(completeMerge);
                }
                // remove features from indices
                unlabeled_features_index.erase(heavy_feature_unmodified_sequence);
            }
            else if (medium_features_index.has(heavy_feature_unmodified_sequence))
            {
                // 3rd case medium and heavy pair
                if (heavy_feature_unmodified_sequence != heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().toString())
                {
                    // add features to final map
                    final_feature_map.push_back(heavy_feature);
                    final_feature_map.push_back(medium_features_index[heavy_feature_unmodified_sequence]);

                    ConsensusFeature c_duplet;
                    c_duplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
                    c_duplet.insert(MEDIUM_FEATURE_MAPID_, medium_features_index[heavy_feature_unmodified_sequence]);
                    c_duplet.ensureUniqueId();

                    consensus_.push_back(c_duplet);
                }
                else
                {
                    // merge all
                    Feature completeMerge = mergeFeatures_(heavy_feature, heavy_feature_unmodified_sequence, medium_features_index, 2, 3);
                    final_feature_map.push_back(completeMerge);
                }
                // remove features from indices
                medium_features_index.erase(heavy_feature_unmodified_sequence);
            }
            else
            {
                // heavy feature is a singleton
                final_feature_map.push_back(heavy_feature);
            }
        }

        // clean up labeled_index
        for (Map<String, Feature>::iterator medium_channle_index_iterator = medium_features_index.begin(); medium_channle_index_iterator != medium_features_index.end(); ++medium_channle_index_iterator)
        {
            Feature& medium_channel_feature = medium_channle_index_iterator->second;
            medium_channel_feature.ensureUniqueId();

            String medium_channel_feature_unmodified_sequence = getUnmodifiedSequence_(medium_channel_feature, medium_channel_arginine_label_, medium_channel_lysine_label_);

            if (unlabeled_features_index.has(medium_channel_feature_unmodified_sequence))
            {
                // 1. case: pair between c0 and c1
                if (medium_channel_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
                {
                    // add features to final map
                    final_feature_map.push_back(medium_channel_feature);
                    final_feature_map.push_back(unlabeled_features_index[medium_channel_feature_unmodified_sequence]);

                    ConsensusFeature c_duplet;
                    c_duplet.insert(MEDIUM_FEATURE_MAPID_, medium_channel_feature);
                    c_duplet.insert(LIGHT_FEATURE_MAPID_, unlabeled_features_index[medium_channel_feature_unmodified_sequence]);
                    c_duplet.ensureUniqueId();
                    consensus_.push_back(c_duplet);
                }
                else
                {
                    // merge
                    Feature completeMerge = mergeFeatures_(medium_channel_feature, medium_channel_feature_unmodified_sequence, unlabeled_features_index, 1, 2);
                    final_feature_map.push_back(completeMerge);
                }
                // remove features from indices
                unlabeled_features_index.erase(medium_channel_feature_unmodified_sequence);
            }
            else
            {
                // c1 is alone
                final_feature_map.push_back(medium_channel_feature);
            }

        }

        // clean up unlabeled_index
        for (Map<String, Feature>::iterator unlabeled_index_iter = unlabeled_features_index.begin(); unlabeled_index_iter != unlabeled_features_index.end(); ++unlabeled_index_iter)
        {
            // the single ones from c0
            final_feature_map.push_back(unlabeled_index_iter->second);
        }
    }

    features_to_simulate.clear();
    features_to_simulate.push_back(final_feature_map);

    consensus_.setProteinIdentifications(final_feature_map.getProteinIdentifications());
    ConsensusMap::FileDescription map_description;
    map_description.label = "Simulation (Labeling Consensus)";
    map_description.size = features_to_simulate.size();
    consensus_.getFileDescriptions()[0] = map_description;
}
Ejemplo n.º 10
0
  void ICPLLabeler::postDigestHook(SimTypes::FeatureMapSimVector& features_to_simulate)
  {
    SimTypes::FeatureMapSim& light_labeled_features = features_to_simulate[0];
    SimTypes::FeatureMapSim& medium_labeled_features = features_to_simulate[1];

    if (param_.getValue("label_proteins") == "false") // loop for peptide-labeling (post-digest-labeling)
    {
      // iterate over first map for light labeling
      for (SimTypes::FeatureMapSim::iterator lf_iter = light_labeled_features.begin(); lf_iter != light_labeled_features.end(); ++lf_iter)
      {
        lf_iter->ensureUniqueId();
        addModificationToPeptideHit_(*lf_iter, light_channel_label_);
      }

      // iterate over second map for medium labeling
      for (SimTypes::FeatureMapSim::iterator lf_iter = medium_labeled_features.begin(); lf_iter != medium_labeled_features.end(); ++lf_iter)
      {
        lf_iter->ensureUniqueId();
        addModificationToPeptideHit_(*lf_iter, medium_channel_label_);
      }

      if (features_to_simulate.size() == 3) //third channel labeling can only be done, if a third channel exist
      {
        SimTypes::FeatureMapSim& heavy_labeled_features = features_to_simulate[2];

        // iterate over third map
        for (SimTypes::FeatureMapSim::iterator lf_iter = heavy_labeled_features.begin(); lf_iter != heavy_labeled_features.end(); ++lf_iter)
        {
          lf_iter->ensureUniqueId();
          addModificationToPeptideHit_(*lf_iter, heavy_channel_label_);
        }
      }
    }

    // merge the generated feature maps and create consensus
    SimTypes::FeatureMapSim final_feature_map = mergeProteinIdentificationsMaps_(features_to_simulate);

    if (features_to_simulate.size() == 2) // merge_modus for two FeatureMaps
    {
      // create index of light channel features for easy mapping of medium-to-light channel
      Map<String, Feature> light_labeled_features_index;
      for (SimTypes::FeatureMapSim::iterator light_labeled_features_iter = light_labeled_features.begin();
           light_labeled_features_iter != light_labeled_features.end();
           ++light_labeled_features_iter)
      {
        (*light_labeled_features_iter).ensureUniqueId();
        light_labeled_features_index.insert(std::make_pair(
                                              getUnmodifiedAASequence_((*light_labeled_features_iter), light_channel_label_),
                                              *light_labeled_features_iter
                                              ));
      }

      // iterate over second map
      for (SimTypes::FeatureMapSim::iterator medium_labeled_feature_iter = medium_labeled_features.begin(); medium_labeled_feature_iter != medium_labeled_features.end(); ++medium_labeled_feature_iter)
      {
        AASequence medium_labeled_feature_sequence = (*medium_labeled_feature_iter).getPeptideIdentifications()[0].getHits()[0].getSequence();

        // guarantee uniqueness
        (*medium_labeled_feature_iter).ensureUniqueId();

        // check if we have a pair
        if (light_labeled_features_index.has(getUnmodifiedAASequence_((*medium_labeled_feature_iter), medium_channel_label_)))
        {
          // own scope as we don't know what happens to 'f_modified' once we call erase() below
          Feature& light_labeled_feature = light_labeled_features_index[getUnmodifiedAASequence_((*medium_labeled_feature_iter), medium_channel_label_)];
          // guarantee uniqueness
          light_labeled_feature.ensureUniqueId();

          if (medium_labeled_feature_sequence.isModified()) // feature has a medium ICPL-Label and is not equal to light-labeled
          {
            // add features to final map
            final_feature_map.push_back(*medium_labeled_feature_iter);
            final_feature_map.push_back(light_labeled_feature);

            // create consensus feature
            ConsensusFeature cf;
            cf.insert(MEDIUM_FEATURE_MAPID_, *medium_labeled_feature_iter);
            cf.insert(LIGHT_FEATURE_MAPID_, light_labeled_feature);

            consensus_.push_back(cf);

            // remove light-labeled feature
            light_labeled_features_index.erase(getUnmodifiedAASequence_((*medium_labeled_feature_iter), medium_channel_label_));
          }
          else
          {
            // merge features since they are equal
            Feature final_feature = mergeFeatures_(*medium_labeled_feature_iter, medium_labeled_feature_sequence, light_labeled_features_index);
            final_feature_map.push_back(final_feature);
          }
        }
        else // no ICPL pair, just add the medium-labeled one
        {
          final_feature_map.push_back(*medium_labeled_feature_iter);
        }
      }

      // add singletons from light-labeled channel
      // clean up light-labeled_index
      for (Map<String, Feature>::iterator light_labeled_index_iter = light_labeled_features_index.begin(); light_labeled_index_iter != light_labeled_features_index.end(); ++light_labeled_index_iter)
      {
        // the single ones from c0
        final_feature_map.push_back(light_labeled_index_iter->second);
      }
    }
    else if (features_to_simulate.size() == 3) // merge_modus for three Channels
    {
      // create index of light channel features for easy mapping of heavy-to-medium-to-light channel
      Map<String, Feature> light_labeled_features_index;
      for (SimTypes::FeatureMapSim::iterator light_labeled_features_iter = light_labeled_features.begin();
           light_labeled_features_iter != light_labeled_features.end();
           ++light_labeled_features_iter)
      {
        (*light_labeled_features_iter).ensureUniqueId();
        light_labeled_features_index.insert(std::make_pair(
                                              getUnmodifiedAASequence_(*light_labeled_features_iter, light_channel_label_),
                                              *light_labeled_features_iter
                                              ));
      }

      // create index of medium channel features for easy mapping of heavy-to-medium-to-light channel
      Map<String, Feature> medium_labeled_features_index;
      for (SimTypes::FeatureMapSim::iterator medium_labeled_features_iter = medium_labeled_features.begin();
           medium_labeled_features_iter != medium_labeled_features.end();
           ++medium_labeled_features_iter)
      {
        (*medium_labeled_features_iter).ensureUniqueId();
        medium_labeled_features_index.insert(std::make_pair(
                                               getUnmodifiedAASequence_((*medium_labeled_features_iter), medium_channel_label_),
                                               *medium_labeled_features_iter
                                               ));
      }

      for (SimTypes::FeatureMapSim::iterator heavy_labeled_feature_iter = features_to_simulate[2].begin(); heavy_labeled_feature_iter != features_to_simulate[2].end(); ++heavy_labeled_feature_iter)
      {
        Feature& heavy_feature = *heavy_labeled_feature_iter;
        String heavy_feature_unmodified_sequence = getUnmodifiedAASequence_(heavy_feature, heavy_channel_label_);
        heavy_feature.ensureUniqueId();

        if (light_labeled_features_index.has(heavy_feature_unmodified_sequence) && medium_labeled_features_index.has(heavy_feature_unmodified_sequence))
        {
          // 1st case .. it is a triplet
          if (heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
          {
            // if heavy feature is modified, all light and medium channel are also

            // add features to final map
            final_feature_map.push_back(heavy_feature);
            final_feature_map.push_back(medium_labeled_features_index[heavy_feature_unmodified_sequence]);
            final_feature_map.push_back(light_labeled_features_index[heavy_feature_unmodified_sequence]);

            // create triplet consensus feature
            ConsensusFeature c_triplet;
            c_triplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
            c_triplet.insert(LIGHT_FEATURE_MAPID_, light_labeled_features_index[heavy_feature_unmodified_sequence]);
            c_triplet.insert(MEDIUM_FEATURE_MAPID_, medium_labeled_features_index[heavy_feature_unmodified_sequence]);

            consensus_.push_back(c_triplet);
          }
          else
          {
            // merge all three channels
            Feature c2c1 = mergeFeatures_(heavy_feature, AASequence::fromString(heavy_feature_unmodified_sequence), medium_labeled_features_index);
            Feature completeMerge = mergeFeatures_(c2c1, AASequence::fromString(heavy_feature_unmodified_sequence), light_labeled_features_index);

            final_feature_map.push_back(completeMerge);
          }
          // remove features from indices
          light_labeled_features_index.erase(heavy_feature_unmodified_sequence);
          medium_labeled_features_index.erase(heavy_feature_unmodified_sequence);
        }
        else if (light_labeled_features_index.has(heavy_feature_unmodified_sequence))
        {
          // 2.Fall -> c0 - c2
          if (heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
          {
            // add features to final map
            final_feature_map.push_back(heavy_feature);
            final_feature_map.push_back(light_labeled_features_index[heavy_feature_unmodified_sequence]);

            ConsensusFeature c_triplet;
            c_triplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
            c_triplet.insert(LIGHT_FEATURE_MAPID_, light_labeled_features_index[heavy_feature_unmodified_sequence]);

            consensus_.push_back(c_triplet);
          }
          else
          {
            // merge all three channels
            Feature completeMerge = mergeFeatures_(heavy_feature, AASequence::fromString(heavy_feature_unmodified_sequence), light_labeled_features_index);
            final_feature_map.push_back(completeMerge);
          }
          // remove features from indices
          light_labeled_features_index.erase(heavy_feature_unmodified_sequence);
        }
        else if (medium_labeled_features_index.has(heavy_feature_unmodified_sequence))
        {
          // 3.Fall -> c1 - c2
          if (heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
          {
            // add features to final map
            final_feature_map.push_back(heavy_feature);
            final_feature_map.push_back(medium_labeled_features_index[heavy_feature_unmodified_sequence]);

            ConsensusFeature c_triplet;
            c_triplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
            c_triplet.insert(MEDIUM_FEATURE_MAPID_, medium_labeled_features_index[heavy_feature_unmodified_sequence]);

            consensus_.push_back(c_triplet);
          }
          else
          {
            // merge all
            Feature completeMerge = mergeFeatures_(heavy_feature, AASequence::fromString(heavy_feature_unmodified_sequence), medium_labeled_features_index);
            final_feature_map.push_back(completeMerge);
          }
          // remove features from indices
          medium_labeled_features_index.erase(heavy_feature_unmodified_sequence);
        }
        else
        {
          // 4.Fall -> alleine
          final_feature_map.push_back(heavy_feature);
        }
      }

      // clean up medium-labeled_index
      for (Map<String, Feature>::iterator medium_labeled_index_iter = medium_labeled_features_index.begin(); medium_labeled_index_iter != medium_labeled_features_index.end(); ++medium_labeled_index_iter)
      {
        Feature& medium_labeled_feature = medium_labeled_index_iter->second;
        medium_labeled_feature.ensureUniqueId();

        String medium_labeled_feature_unmodified_sequence = medium_labeled_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString();

        if (light_labeled_features_index.has(medium_labeled_feature_unmodified_sequence))
        {
          // 1. case: pair between c0 and c1
          if (medium_labeled_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
          {
            // add features to final map
            final_feature_map.push_back(medium_labeled_feature);
            final_feature_map.push_back(light_labeled_features_index[medium_labeled_feature_unmodified_sequence]);

            ConsensusFeature c_triplet;
            c_triplet.insert(MEDIUM_FEATURE_MAPID_, medium_labeled_feature);
            c_triplet.insert(LIGHT_FEATURE_MAPID_, light_labeled_features_index[medium_labeled_feature_unmodified_sequence]);

            consensus_.push_back(c_triplet);
          }
          else
          {
            // merge
            Feature completeMerge = mergeFeatures_(medium_labeled_feature, AASequence::fromString(medium_labeled_feature_unmodified_sequence), light_labeled_features_index);
            final_feature_map.push_back(completeMerge);
          }
          // remove features from indices
          light_labeled_features_index.erase(medium_labeled_feature_unmodified_sequence);
        }
        else
        {
          // c1 is alone
          final_feature_map.push_back(medium_labeled_feature);
        }
      }

      // clean up light-labeled_index
      for (Map<String, Feature>::iterator light_labeled_index_iter = light_labeled_features_index.begin(); light_labeled_index_iter != light_labeled_features_index.end(); ++light_labeled_index_iter)
      {
        // the single ones from c0
        final_feature_map.push_back(light_labeled_index_iter->second);
      }
    }

    features_to_simulate.clear();
    features_to_simulate.push_back(final_feature_map);

    consensus_.setProteinIdentifications(final_feature_map.getProteinIdentifications());
    ConsensusMap::FileDescription map_description;
    map_description.label = "Simulation (Labeling Consensus)";
    map_description.size = features_to_simulate.size();
    consensus_.getFileDescriptions()[0] = map_description;
  }