Ejemplo n.º 1
0
ConsensusFeature getCFWithIntensites(double v[])
{
  ConsensusFeature cf;
  BaseFeature bf0, bf1, bf2, bf3;
  bf0.setIntensity(v[0]);
  bf1.setIntensity(v[1]);
  bf2.setIntensity(v[2]);
  bf3.setIntensity(v[3]);
  cf.insert(0, bf0);cf.insert(1, bf1);cf.insert(2, bf2);cf.insert(3, bf3);
  cf.setIntensity(v[0]+v[1]+v[2]+v[3]);
  return cf;
}
  void FeatureGroupingAlgorithm::transferSubelements(const vector<ConsensusMap>& maps, ConsensusMap& out) const
  {
    // accumulate file descriptions from the input maps:
    // cout << "Updating file descriptions..." << endl;
    out.getFileDescriptions().clear();
    // mapping: (map index, original id) -> new id
    map<pair<Size, UInt64>, Size> mapid_table;
    for (Size i = 0; i < maps.size(); ++i)
    {
      const ConsensusMap& consensus = maps[i];
      for (ConsensusMap::FileDescriptions::const_iterator desc_it = consensus.getFileDescriptions().begin(); desc_it != consensus.getFileDescriptions().end(); ++desc_it)
      {
        Size counter = mapid_table.size();
        mapid_table[make_pair(i, desc_it->first)] = counter;
        out.getFileDescriptions()[counter] = desc_it->second;
      }
    }

    // look-up table: input map -> unique ID -> consensus feature
    // cout << "Creating look-up table..." << endl;
    vector<map<UInt64, ConsensusMap::ConstIterator> > feat_lookup(maps.size());
    for (Size i = 0; i < maps.size(); ++i)
    {
      const ConsensusMap& consensus = maps[i];
      for (ConsensusMap::ConstIterator feat_it = consensus.begin();
           feat_it != consensus.end(); ++feat_it)
      {
        // do NOT use "id_lookup[i][feat_it->getUniqueId()] = feat_it;" here as
        // you will get "attempt to copy-construct an iterator from a singular
        // iterator" in STL debug mode:
        feat_lookup[i].insert(make_pair(feat_it->getUniqueId(), feat_it));
      }
    }
    // adjust the consensus features:
    // cout << "Adjusting consensus features..." << endl;
    for (ConsensusMap::iterator cons_it = out.begin(); cons_it != out.end(); ++cons_it)
    {
      ConsensusFeature adjusted = ConsensusFeature(
        static_cast<BaseFeature>(*cons_it)); // remove sub-features
      for (ConsensusFeature::HandleSetType::const_iterator sub_it = cons_it->getFeatures().begin(); sub_it != cons_it->getFeatures().end(); ++sub_it)
      {
        UInt64 id = sub_it->getUniqueId();
        Size map_index = sub_it->getMapIndex();
        ConsensusMap::ConstIterator origin = feat_lookup[map_index][id];
        for (ConsensusFeature::HandleSetType::const_iterator handle_it = origin->getFeatures().begin(); handle_it != origin->getFeatures().end(); ++handle_it)
        {
          FeatureHandle handle = *handle_it;
          Size new_id = mapid_table[make_pair(map_index, handle.getMapIndex())];
          handle.setMapIndex(new_id);
          adjusted.insert(handle);
        }
      }
      *cons_it = adjusted;
    }
  }
Ejemplo n.º 3
0
  ExitCodes main_(int, const char**) override
  {

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    String in = getStringOption_("in");
    String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));

    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = FileHandler().getTypeByFileName(out);
    }

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    MzMLFile mz_data_file;
    mz_data_file.setLogType(log_type_);
    PeakMap ms_peakmap;
    std::vector<Int> ms_level(1, 1);
    (mz_data_file.getOptions()).setMSLevels(ms_level);
    mz_data_file.load(in, ms_peakmap);

    if (ms_peakmap.size() == 0)
    {
      LOG_WARN << "The given file does not contain any conventional peak data, but might"
                  " contain chromatograms. This tool currently cannot handle them, sorry.";
      return INCOMPATIBLE_INPUT_DATA;
    }

    // make sure that the spectra are sorted by m/z
    ms_peakmap.sortSpectra(true);

    //-------------------------------------------------------------
    // get params for MTD and EPD algorithms
    //-------------------------------------------------------------
    Param com_param = getParam_().copy("algorithm:common:", true);
    writeDebug_("Common parameters passed to both sub-algorithms (mtd and epd)", com_param, 3);

    Param mtd_param = getParam_().copy("algorithm:mtd:", true);
    writeDebug_("Parameters passed to MassTraceDetection", mtd_param, 3);

    Param epd_param = getParam_().copy("algorithm:epd:", true);
    writeDebug_("Parameters passed to ElutionPeakDetection", epd_param, 3);


    //-------------------------------------------------------------
    // configure and run MTD
    //-------------------------------------------------------------

    MassTraceDetection mt_ext;
    mtd_param.insert("", com_param);
    mtd_param.remove("chrom_fwhm");
    mt_ext.setParameters(mtd_param);
    vector<MassTrace> m_traces;
    mt_ext.run(ms_peakmap, m_traces);

    vector<MassTrace> m_traces_final;

    bool use_epd = epd_param.getValue("enabled").toBool();

    if (!use_epd)
    {
      swap(m_traces_final, m_traces);
    }
    else
    {
      ElutionPeakDetection ep_det;

      epd_param.remove("enabled"); // artificially added above
      epd_param.insert("", com_param);

      ep_det.setParameters(epd_param);

      std::vector<MassTrace> split_mtraces;
      // note: this step will destroy any meta data annotation (e.g. FWHM_mz_avg)
      ep_det.detectPeaks(m_traces, split_mtraces);

      if (ep_det.getParameters().getValue("width_filtering") == "auto")
      {
        m_traces_final.clear();
        ep_det.filterByPeakWidth(split_mtraces, m_traces_final);

        LOG_INFO << "Notice: " << split_mtraces.size() - m_traces_final.size()
                 << " of total " << split_mtraces.size() 
                 << " were dropped because of too low peak width." << std::endl;
      }
      else
      {
        swap(m_traces_final, split_mtraces);
      }
    }

    //-------------------------------------------------------------
    // writing consensus map output
    //-------------------------------------------------------------
    if (out_type == FileTypes::CONSENSUSXML)
    {
      ConsensusMap consensus_map;
      StringList ms_runs;
      ms_peakmap.getPrimaryMSRunPath(ms_runs);
      consensus_map.setPrimaryMSRunPath(ms_runs);

      for (Size i = 0; i < m_traces_final.size(); ++i)
      {
        if (m_traces_final[i].getSize() == 0) continue;

        ConsensusFeature fcons;
        int k = 0;
        for (MassTrace::const_iterator it = m_traces_final[i].begin(); it != m_traces_final[i].end(); ++it)
        {
          FeatureHandle fhandle;
          fhandle.setRT(it->getRT());
          fhandle.setMZ(it->getMZ());
          fhandle.setIntensity(it->getIntensity());
          fhandle.setUniqueId(++k);
          fcons.insert(fhandle);
        }

        fcons.setMetaValue(3, m_traces_final[i].getLabel());
        fcons.setCharge(0);
        fcons.setWidth(m_traces_final[i].estimateFWHM(use_epd));
        fcons.setQuality(1 - (1.0 / m_traces_final[i].getSize()));

        fcons.setRT(m_traces_final[i].getCentroidRT());
        fcons.setMZ(m_traces_final[i].getCentroidMZ());
        fcons.setIntensity(m_traces_final[i].getIntensity(false));
        consensus_map.push_back(fcons);
      }
      consensus_map.applyMemberFunction(&UniqueIdInterface::setUniqueId);
      addDataProcessing_(consensus_map, getProcessingInfo_(DataProcessing::QUANTITATION));
      consensus_map.setUniqueId();
      ConsensusXMLFile().store(out, consensus_map);

    }
    else //(out_type == FileTypes::FEATUREXML)
    {

      //-----------------------------------------------------------
      // convert mass traces to features
      //-----------------------------------------------------------

      std::vector<double> stats_sd;
      FeatureMap ms_feat_map;
      StringList ms_runs;
      ms_peakmap.getPrimaryMSRunPath(ms_runs);
      ms_feat_map.setPrimaryMSRunPath(ms_runs);
      for (Size i = 0; i < m_traces_final.size(); ++i)
      {
        if (m_traces_final[i].getSize() == 0) continue;

        m_traces_final[i].updateMeanMZ();
        m_traces_final[i].updateWeightedMZsd();

        Feature f;
        f.setMetaValue(3, m_traces_final[i].getLabel());
        f.setCharge(0);
        f.setMZ(m_traces_final[i].getCentroidMZ());
        f.setIntensity(m_traces_final[i].getIntensity(false));
        f.setRT(m_traces_final[i].getCentroidRT());
        f.setWidth(m_traces_final[i].estimateFWHM(use_epd));
        f.setOverallQuality(1 - (1.0 / m_traces_final[i].getSize()));
        f.getConvexHulls().push_back(m_traces_final[i].getConvexhull());
        double sd = m_traces_final[i].getCentroidSD();
        f.setMetaValue("SD", sd);
        f.setMetaValue("SD_ppm", sd / f.getMZ() * 1e6);
        if (m_traces_final[i].fwhm_mz_avg > 0) f.setMetaValue("FWHM_mz_avg", m_traces_final[i].fwhm_mz_avg);
        stats_sd.push_back(m_traces_final[i].getCentroidSD());
        ms_feat_map.push_back(f);
      }

      // print some stats about standard deviation of mass traces
      if (stats_sd.size() > 0)
      {
        std::sort(stats_sd.begin(), stats_sd.end());
        LOG_INFO << "Mass trace m/z s.d.\n"
                 << "    low quartile: " << stats_sd[stats_sd.size() * 1 / 4] << "\n"
                 << "          median: " << stats_sd[stats_sd.size() * 1 / 2] << "\n"
                 << "    upp quartile: " << stats_sd[stats_sd.size() * 3 / 4] << std::endl;
      }


      ms_feat_map.applyMemberFunction(&UniqueIdInterface::setUniqueId);

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info TODO
      addDataProcessing_(ms_feat_map, getProcessingInfo_(DataProcessing::QUANTITATION));
      //ms_feat_map.setUniqueId();

      FeatureXMLFile().store(out, ms_feat_map);
    }

    return EXECUTION_OK;
  }
  void IsobaricChannelExtractor::extractChannels(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map)
  {
    if (ms_exp_data.empty())
    {
      LOG_WARN << "The given file does not contain any conventional peak data, but might"
                  " contain chromatograms. This tool currently cannot handle them, sorry.\n";
      throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!");
    }

    // clear the output map
    consensus_map.clear(false);
    consensus_map.setExperimentType("labeled_MS2");

    // create predicate for spectrum checking
    LOG_INFO << "Selecting scans with activation mode: " << (selected_activation_ == "" ? "any" : selected_activation_) << "\n";
    HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(StringList::create(selected_activation_));

    // now we have picked data
    // --> assign peaks to channels
    UInt64 element_index(0);

    // remember the current precusor spectrum
    MSExperiment<Peak1D>::ConstIterator prec_spec = ms_exp_data.end();

    for (MSExperiment<Peak1D>::ConstIterator it = ms_exp_data.begin(); it != ms_exp_data.end(); ++it)
    {
      // remember the last MS1 spectra as we assume it to be the precursor spectrum
      if (it->getMSLevel() ==  1) prec_spec = it;

      if (selected_activation_ == "" || activation_predicate(*it))
      {
        // check if precursor is available
        if (it->getPrecursors().empty())
        {
          throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + it->getNativeID() + " with RT " + String(it->getRT()));
        }

        // check precursor constraints
        if (!isValidPrecursor_(it->getPrecursors()[0]))
        {
          LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor doesn't fulfill all constraints." << std::endl;
          continue;
        }

        // check precursor purity if we have a valid precursor ..
        if (prec_spec != ms_exp_data.end())
        {
          const DoubleReal purity = computePrecursorPurity_(it, prec_spec);
          if (purity < min_precursor_purity_)
          {
            LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold. [purity = " << purity << "]" << std::endl;
            continue;
          }
        }
        else
        {
          LOG_INFO << "No precursor available for spectrum: " << it->getNativeID() << std::endl;
        }
        if (!(prec_spec == ms_exp_data.end()) && computePrecursorPurity_(it, prec_spec) < min_precursor_purity_)
        {
          LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold." << std::endl;
          continue;
        }

        // store RT&MZ of parent ion as centroid of ConsensusFeature
        ConsensusFeature cf;
        cf.setUniqueId();
        cf.setRT(it->getRT());
        cf.setMZ(it->getPrecursors()[0].getMZ());

        Peak2D channel_value;
        channel_value.setRT(it->getRT());
        // for each each channel
        UInt64 map_index = 0;
        Peak2D::IntensityType overall_intensity = 0;
        for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin();
             cl_it != quant_method_->getChannelInformation().end();
             ++cl_it)
        {
          // set mz-position of channel
          channel_value.setMZ(cl_it->center);
          // reset intensity
          channel_value.setIntensity(0);

          // as every evaluation requires time, we cache the MZEnd iterator
          const MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_end = it->MZEnd(cl_it->center + reporter_mass_shift_);

          // add up all signals
          for (MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_it = it->MZBegin(cl_it->center - reporter_mass_shift_);
               mz_it != mz_end;
               ++mz_it)
          {
            channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity());
          }

          // discard contribution of this channel as it is below the required intensity threshold
          if (channel_value.getIntensity() < min_reporter_intensity_)
          {
            channel_value.setIntensity(0);
          }

          overall_intensity += channel_value.getIntensity();
          // add channel to ConsensusFeature
          cf.insert(map_index++, channel_value, element_index);
        } // ! channel_iterator

        // check if we keep this feature or if it contains low-intensity quantifications
        if (remove_low_intensity_quantifications_ && hasLowIntensityReporter_(cf))
        {
          continue;
        }

        // check featureHandles are not empty
        if (overall_intensity == 0)
        {
          cf.setMetaValue("all_empty", String("true"));
        }
        cf.setIntensity(overall_intensity);
        consensus_map.push_back(cf);

        // the tandem-scan in the order they appear in the experiment
        ++element_index;
      }
    } // ! Experiment iterator

    /// add meta information to the map
    registerChannelsInOutputMap_(consensus_map);
  }
Ejemplo n.º 5
0
  void SimplePairFinder::run(const std::vector<ConsensusMap> & input_maps, ConsensusMap & result_map)
  {
    if (input_maps.size() != 2)
      throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "exactly two input maps required");
    checkIds_(input_maps);

    // progress dots
    Int progress_dots = 0;
    if (this->param_.exists("debug::progress_dots"))
    {
      progress_dots = (Int) this->param_.getValue("debug:progress_dots");
    }
    Int number_of_considered_element_pairs = 0;

    // For each element in map 0, find its best friend in map 1
    std::vector<UInt> best_companion_index_0(input_maps[0].size(), UInt(-1));
    std::vector<double> best_companion_quality_0(input_maps[0].size(), 0);
    for (UInt fi0 = 0; fi0 < input_maps[0].size(); ++fi0)
    {
      double best_quality = -std::numeric_limits<double>::max();
      for (UInt fi1 = 0; fi1 < input_maps[1].size(); ++fi1)
      {
        double quality = similarity_(input_maps[0][fi0], input_maps[1][fi1]);
        if (quality > best_quality)
        {
          best_quality = quality;
          best_companion_index_0[fi0] = fi1;
        }

        ++number_of_considered_element_pairs;
        if (progress_dots && !(number_of_considered_element_pairs % progress_dots))
        {
          std::cout << '-' << std::flush;
        }

      }
      best_companion_quality_0[fi0] = best_quality;
    }

    // For each element in map 1, find its best friend in map 0
    std::vector<UInt> best_companion_index_1(input_maps[1].size(), UInt(-1));
    std::vector<double> best_companion_quality_1(input_maps[1].size(), 0);
    for (UInt fi1 = 0; fi1 < input_maps[1].size(); ++fi1)
    {
      double best_quality = -std::numeric_limits<double>::max();
      for (UInt fi0 = 0; fi0 < input_maps[0].size(); ++fi0)
      {
        double quality = similarity_(input_maps[0][fi0], input_maps[1][fi1]);
        if (quality > best_quality)
        {
          best_quality = quality;
          best_companion_index_1[fi1] = fi0;
        }

        ++number_of_considered_element_pairs;
        if (progress_dots && !(number_of_considered_element_pairs % progress_dots))
        {
          std::cout << '+' << std::flush;
        }

      }
      best_companion_quality_1[fi1] = best_quality;
    }

    // And if both like each other, they become a pair.
    // element_pairs_->clear();
    for (UInt fi0 = 0; fi0 < input_maps[0].size(); ++fi0)
    {
      // fi0 likes someone ...
      if (best_companion_quality_0[fi0] > pair_min_quality_)
      {
        // ... who likes him too ...
        UInt best_companion_of_fi0 = best_companion_index_0[fi0];
        if (best_companion_index_1[best_companion_of_fi0] == fi0 &&
            best_companion_quality_1[best_companion_of_fi0] > pair_min_quality_
            )
        {
          ConsensusFeature f;
          f.insert(input_maps[0][fi0]);
          f.insert(input_maps[1][best_companion_of_fi0]);
          f.computeConsensus();
          f.setQuality(best_companion_quality_0[fi0] + best_companion_quality_1[best_companion_of_fi0]);
          result_map.push_back(f);
        }
      }
    }
    return;
  }
  /// @brief extracts the iTRAQ channels from the MS data and stores intensity values in a consensus map
  ///
  /// @param ms_exp_data Raw data to read
  /// @param consensus_map Output each MS² scan as a consensus feature
  /// @throws Exception::MissingInformation if no scans present or MS² scan has no precursor
  void ItraqChannelExtractor::run(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map)
  {
    if (ms_exp_data.empty())
    {
      LOG_WARN << "The given file does not contain any conventional peak data, but might"
                  " contain chromatograms. This tool currently cannot handle them, sorry.";
      throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!");
    }

    MSExperiment<> ms_exp_MS2;

    String mode = (String) param_.getValue("select_activation");
    std::cout << "Selecting scans with activation mode: " << (mode == "" ? "any" : mode) << "\n";
    HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(ListUtils::create<String>(mode));

    for (size_t idx = 0; idx < ms_exp_data.size(); ++idx)
    {
      if (ms_exp_data[idx].getMSLevel() == 2)
      {
        if (mode == "" || activation_predicate(ms_exp_data[idx]))
        {
          // copy only MS² scans
          ms_exp_MS2.addSpectrum(ms_exp_data[idx]);
        }
        else
        {
          //std::cout << "deleting spectrum # " << idx << " with RT: " << ms_exp_data[idx].getRT() << "\n";
        }
      }
    }

#ifdef ITRAQ_DEBUG
    std::cout << "we have " << ms_exp_MS2.size() << " scans left of level " << ms_exp_MS2[0].getMSLevel() << std::endl;
    std::cout << "run: channel_map_ has " << channel_map_.size() << " entries!" << std::endl;
#endif
    consensus_map.clear(false);
    // set <mapList> header
    Int index_cnt = 0;
    for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it)
    {
      // structure of Map cm_it
      //  first == channel-name as Int e.g. 114
      //  second == ChannelInfo struct
      ConsensusMap::FileDescription channel_as_map;
      // label is the channel + description provided in the Params
      if (itraq_type_ != TMT_SIXPLEX)
        channel_as_map.label = "iTRAQ_" + String(cm_it->second.name) + "_" + String(cm_it->second.description);
      else
        channel_as_map.label = "TMT_" + String(cm_it->second.name) + "_" + String(cm_it->second.description);

      channel_as_map.size = ms_exp_MS2.size();
      //TODO what about .filename? leave empty?
      // add some more MetaInfo
      channel_as_map.setMetaValue("channel_name", cm_it->second.name);
      channel_as_map.setMetaValue("channel_id", cm_it->second.id);
      channel_as_map.setMetaValue("channel_description", cm_it->second.description);
      channel_as_map.setMetaValue("channel_center", cm_it->second.center);
      channel_as_map.setMetaValue("channel_active", String(cm_it->second.active ? "true" : "false"));
      consensus_map.getFileDescriptions()[index_cnt++] = channel_as_map;
    }

    // create consensusElements

    Peak2D::CoordinateType allowed_deviation = (Peak2D::CoordinateType) param_.getValue("reporter_mass_shift");
    // now we have picked data
    // --> assign peaks to channels
    UInt element_index(0);

    for (MSExperiment<>::ConstIterator it = ms_exp_MS2.begin(); it != ms_exp_MS2.end(); ++it)
    {
      // store RT&MZ of parent ion as centroid of ConsensusFeature
      ConsensusFeature cf;
      cf.setUniqueId();
      cf.setRT(it->getRT());
      if (it->getPrecursors().size() >= 1)
      {
        cf.setMZ(it->getPrecursors()[0].getMZ());
      }
      else
      {
        throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + String(it->getNativeID()) + " with RT " + String(it->getRT()));
      }

      Peak2D channel_value;
      channel_value.setRT(it->getRT());
      // for each each channel
      Int index = 0;
      Peak2D::IntensityType overall_intensity = 0;
      for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it)
      {
        // set mz-position of channel
        channel_value.setMZ(cm_it->second.center);
        // reset intensity
        channel_value.setIntensity(0);

        //add up all signals
        for (MSExperiment<>::SpectrumType::ConstIterator mz_it =
               it->MZBegin(cm_it->second.center - allowed_deviation)
             ; mz_it != it->MZEnd(cm_it->second.center + allowed_deviation)
             ; ++mz_it
             )
        {
          channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity());
        }

        overall_intensity += channel_value.getIntensity();

        // add channel to ConsensusFeature
        cf.insert(index++, channel_value, element_index);

      } // ! channel_iterator


      // check featureHandles are not empty
      if (overall_intensity == 0)
      {
        cf.setMetaValue("all_empty", String("true"));
      }
      cf.setIntensity(overall_intensity);
      consensus_map.push_back(cf);

      // the tandem-scan in the order they appear in the experiment
      ++element_index;
    } // ! Experiment iterator


#ifdef ITRAQ_DEBUG
    std::cout << "processed " << element_index << " scans" << std::endl;
#endif

    consensus_map.setExperimentType("itraq");

    return;
  }
Ejemplo n.º 7
0
void BaseLabeler::recomputeConsensus_(const FeatureMapSim & simulated_features)
{
    // iterate over all given features stored in the labeling consensus and try to find the corresponding feature in
    // in the feature map

    // build index for faster access
    Map<String, IntList> id_map;
    Map<UInt64, Size> features_per_labeled_map;
    for (Size i = 0; i < simulated_features.size(); ++i)
    {
        if (simulated_features[i].metaValueExists("parent_feature"))
        {
            LOG_DEBUG << "Checking [" << i << "]: " << simulated_features[i].getPeptideIdentifications()[0].getHits()[0].getSequence().toString()
                      << " with charge " << simulated_features[i].getCharge() << " (" << simulated_features[i].getMetaValue("charge_adducts") << ")"
                      << " parent was " << simulated_features[i].getMetaValue("parent_feature") << std::endl;
            id_map[simulated_features[i].getMetaValue("parent_feature")].push_back((Int)i);

            UInt64 map_index = 0;
            if (simulated_features[i].metaValueExists("map_index"))
            {
                map_index = simulated_features[i].getMetaValue("map_index");
            }
            ++features_per_labeled_map[map_index];
        }
    }

    for (Map<String, IntList>::iterator it = id_map.begin(); it != id_map.end(); ++it)
    {
        LOG_DEBUG << it->first << " " << it->second << std::endl;
    }

    // new consensus map
    ConsensusMap new_cm;

    // initialize submaps in consensus map
    for (Map<UInt64, Size>::Iterator it = features_per_labeled_map.begin(); it != features_per_labeled_map.end(); ++it)
    {
        new_cm.getFileDescriptions()[it->first].size = it->second;
        new_cm.getFileDescriptions()[it->first].unique_id = simulated_features.getUniqueId();
    }

    for (ConsensusMap::iterator cm_iter = consensus_.begin(); cm_iter != consensus_.end(); ++cm_iter)
    {
        bool complete = true;

        LOG_DEBUG << "Checking consensus feature containing: " << std::endl;

        // check if we have all elements of current CF in the new feature map (simulated_features)
        for (ConsensusFeature::iterator cf_iter = (*cm_iter).begin(); cf_iter != (*cm_iter).end(); ++cf_iter)
        {
            complete &= id_map.has(String((*cf_iter).getUniqueId()));
            LOG_DEBUG << "\t" << String((*cf_iter).getUniqueId()) << std::endl;
        }

        if (complete)
        {
            // get all elements sorted by charge state; since the same charge can be achieved by different
            // adduct compositions we use the adduct-string as indicator to find the groups
            Map<String, std::set<FeatureHandle, FeatureHandle::IndexLess> > charge_mapping;

            for (ConsensusFeature::iterator cf_iter = (*cm_iter).begin(); cf_iter != (*cm_iter).end(); ++cf_iter)
            {
                IntList feature_indices = id_map[String((*cf_iter).getUniqueId())];

                for (IntList::iterator it = feature_indices.begin(); it != feature_indices.end(); ++it)
                {
                    UInt64 map_index = 0;
                    if (simulated_features[*it].metaValueExists("map_index"))
                    {
                        map_index = simulated_features[*it].getMetaValue("map_index");
                    }

                    if (charge_mapping.has(simulated_features[*it].getMetaValue("charge_adducts")))
                    {
                        charge_mapping[simulated_features[*it].getMetaValue("charge_adducts")].insert(FeatureHandle(map_index, simulated_features[*it]));
                    }
                    else
                    {
                        LOG_DEBUG << "Create new set with charge composition " << simulated_features[*it].getMetaValue("charge_adducts") << std::endl;
                        std::set<FeatureHandle, FeatureHandle::IndexLess> fh_set;

                        fh_set.insert(FeatureHandle(map_index, simulated_features[*it]));
                        charge_mapping.insert(std::make_pair(simulated_features[*it].getMetaValue("charge_adducts"), fh_set));
                    }
                }
            }

            // create new consensus feature from derived features (separated by charge, if charge != 0)
            for (Map<String, std::set<FeatureHandle, FeatureHandle::IndexLess> >::const_iterator charge_group_it = charge_mapping.begin();
                    charge_group_it != charge_mapping.end();
                    ++charge_group_it)
            {
                ConsensusFeature cf;
                cf.setCharge((*(*charge_group_it).second.begin()).getCharge());
                cf.setMetaValue("charge_adducts", charge_group_it->first);

                std::vector<PeptideIdentification> ids;
                for (std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fh_it = (charge_group_it->second).begin(); fh_it != (charge_group_it->second).end(); ++fh_it)
                {
                    cf.insert(*fh_it);
                    // append identifications
                    Size f_index = simulated_features.uniqueIdToIndex(fh_it->getUniqueId());
                    std::vector<PeptideIdentification> ids_feature = simulated_features[f_index].getPeptideIdentifications();
                    ids.insert(ids.end(), ids_feature.begin(), ids_feature.end());
                }

                cf.computeMonoisotopicConsensus();
                cf.setPeptideIdentifications(ids);

                new_cm.push_back(cf);
            }

        }
    }

    new_cm.setProteinIdentifications(simulated_features.getProteinIdentifications());

    consensus_.swap(new_cm);
    consensus_.applyMemberFunction(&UniqueIdInterface::ensureUniqueId);
}
Ejemplo n.º 8
0
void SILACLabeler::postDigestHook(SimTypes::FeatureMapSimVector& features_to_simulate)
{

    SimTypes::FeatureMapSim& light_channel_features = features_to_simulate[0];
    SimTypes::FeatureMapSim& medium_channel_features = features_to_simulate[1];

    // merge the generated feature maps and create consensus
    SimTypes::FeatureMapSim final_feature_map = mergeProteinIdentificationsMaps_(features_to_simulate);

    if (features_to_simulate.size() == 2)
    {
        Map<String, Feature> unlabeled_features_index;
        for (SimTypes::FeatureMapSim::iterator unlabeled_features_iter = light_channel_features.begin();
                unlabeled_features_iter != light_channel_features.end();
                ++unlabeled_features_iter)
        {
            (*unlabeled_features_iter).ensureUniqueId();
            unlabeled_features_index.insert(std::make_pair(
                                                (*unlabeled_features_iter).getPeptideIdentifications()[0].getHits()[0].getSequence().toString()
                                                ,
                                                *unlabeled_features_iter
                                            ));
        }

        // iterate over second map
        for (SimTypes::FeatureMapSim::iterator labeled_feature_iter = medium_channel_features.begin(); labeled_feature_iter != medium_channel_features.end(); ++labeled_feature_iter)
        {
            const String unmodified_sequence = getUnmodifiedSequence_(*labeled_feature_iter, medium_channel_arginine_label_, medium_channel_lysine_label_);

            // guarantee uniqueness
            (*labeled_feature_iter).ensureUniqueId();

            // check if we have a pair
            if (unlabeled_features_index.has(unmodified_sequence))
            {
                // own scope as we don't know what happens to 'f_modified' once we call erase() below
                Feature& unlabeled_feature = unlabeled_features_index[unmodified_sequence];
                // guarantee uniqueness
                unlabeled_feature.ensureUniqueId();

                // feature has a SILAC Label and is not equal to non-labeled
                if ((*labeled_feature_iter).getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
                {
                    // add features to final map
                    final_feature_map.push_back(*labeled_feature_iter);
                    final_feature_map.push_back(unlabeled_feature);

                    // create consensus feature
                    ConsensusFeature cf;
                    cf.insert(MEDIUM_FEATURE_MAPID_, *labeled_feature_iter);
                    cf.insert(LIGHT_FEATURE_MAPID_, unlabeled_feature);
                    cf.ensureUniqueId();
                    consensus_.push_back(cf);

                    // remove unlabeled feature
                    unlabeled_features_index.erase(unmodified_sequence);
                }
                else
                {
                    // merge features since they are equal
                    Feature final_feature = mergeFeatures_(*labeled_feature_iter, unmodified_sequence, unlabeled_features_index, 1, 2);
                    final_feature_map.push_back(final_feature);
                }
            }
            else // no SILAC pair, just add the labeled one
            {
                final_feature_map.push_back(*labeled_feature_iter);
            }
        }

        // add singletons from unlabeled channel
        // clean up unlabeled_index
        for (Map<String, Feature>::iterator unlabeled_index_iter = unlabeled_features_index.begin(); unlabeled_index_iter != unlabeled_features_index.end(); ++unlabeled_index_iter)
        {
            // the single ones from c0
            final_feature_map.push_back(unlabeled_index_iter->second);
        }
    }

    // merge three channels
    if (features_to_simulate.size() == 3)
    {

        // index of unlabeled channelunlabeled_feature
        Map<String, Feature> unlabeled_features_index;
        for (SimTypes::FeatureMapSim::iterator unlabeled_features_iter = light_channel_features.begin();
                unlabeled_features_iter != light_channel_features.end();
                ++unlabeled_features_iter)
        {
            (*unlabeled_features_iter).ensureUniqueId();
            unlabeled_features_index.insert(std::make_pair(
                                                (*unlabeled_features_iter).getPeptideIdentifications()[0].getHits()[0].getSequence().toString()
                                                ,
                                                *unlabeled_features_iter
                                            ));
        }

        // index of labeled channel
        Map<String, Feature> medium_features_index;
        for (SimTypes::FeatureMapSim::iterator labeled_features_iter = medium_channel_features.begin();
                labeled_features_iter != medium_channel_features.end();
                ++labeled_features_iter)
        {
            (*labeled_features_iter).ensureUniqueId();
            medium_features_index.insert(std::make_pair(
                                             getUnmodifiedSequence_(*labeled_features_iter, medium_channel_arginine_label_, medium_channel_lysine_label_)
                                             ,
                                             *labeled_features_iter
                                         ));
        }

        SimTypes::FeatureMapSim& heavy_labeled_features = features_to_simulate[2];
        for (SimTypes::FeatureMapSim::iterator heavy_labeled_feature_iter = heavy_labeled_features.begin();
                heavy_labeled_feature_iter != heavy_labeled_features.end();
                ++heavy_labeled_feature_iter)
        {

            Feature& heavy_feature = *heavy_labeled_feature_iter;
            heavy_feature.ensureUniqueId();

            String heavy_feature_unmodified_sequence = getUnmodifiedSequence_(heavy_feature, heavy_channel_arginine_label_, heavy_channel_lysine_label_);

            if (unlabeled_features_index.has(heavy_feature_unmodified_sequence) && medium_features_index.has(heavy_feature_unmodified_sequence))
            {
                // it is a triplet
                // c2 & c1 modified
                if (heavy_feature_unmodified_sequence != heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().toString())
                {
                    // add features to final map
                    final_feature_map.push_back(heavy_feature);
                    final_feature_map.push_back(medium_features_index[heavy_feature_unmodified_sequence]);
                    final_feature_map.push_back(unlabeled_features_index[heavy_feature_unmodified_sequence]);

                    ConsensusFeature c_triplet;
                    c_triplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
                    c_triplet.insert(LIGHT_FEATURE_MAPID_, unlabeled_features_index[heavy_feature_unmodified_sequence]);
                    c_triplet.insert(MEDIUM_FEATURE_MAPID_, medium_features_index[heavy_feature_unmodified_sequence]);
                    c_triplet.ensureUniqueId();

                    consensus_.push_back(c_triplet);
                }
                else
                {
                    // merge all three channels
                    Feature completeMerge = mergeAllChannelFeatures_(heavy_feature, heavy_feature_unmodified_sequence, unlabeled_features_index, medium_features_index);
                    final_feature_map.push_back(completeMerge);
                }
                // remove features from indices
                unlabeled_features_index.erase(heavy_feature_unmodified_sequence);
                medium_features_index.erase(heavy_feature_unmodified_sequence);
            }
            else if (unlabeled_features_index.has(heavy_feature_unmodified_sequence))
            {
                // 2nd case light and heavy pair
                if (heavy_feature_unmodified_sequence != heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().toString())
                {
                    // add features to final map
                    final_feature_map.push_back(heavy_feature);
                    final_feature_map.push_back(unlabeled_features_index[heavy_feature_unmodified_sequence]);

                    ConsensusFeature c_duplet;
                    c_duplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
                    c_duplet.insert(LIGHT_FEATURE_MAPID_, unlabeled_features_index[heavy_feature_unmodified_sequence]);
                    c_duplet.ensureUniqueId();

                    consensus_.push_back(c_duplet);
                }
                else
                {
                    // merge all three channels
                    Feature completeMerge = mergeFeatures_(heavy_feature, heavy_feature_unmodified_sequence, unlabeled_features_index, 1, 3);
                    final_feature_map.push_back(completeMerge);
                }
                // remove features from indices
                unlabeled_features_index.erase(heavy_feature_unmodified_sequence);
            }
            else if (medium_features_index.has(heavy_feature_unmodified_sequence))
            {
                // 3rd case medium and heavy pair
                if (heavy_feature_unmodified_sequence != heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().toString())
                {
                    // add features to final map
                    final_feature_map.push_back(heavy_feature);
                    final_feature_map.push_back(medium_features_index[heavy_feature_unmodified_sequence]);

                    ConsensusFeature c_duplet;
                    c_duplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
                    c_duplet.insert(MEDIUM_FEATURE_MAPID_, medium_features_index[heavy_feature_unmodified_sequence]);
                    c_duplet.ensureUniqueId();

                    consensus_.push_back(c_duplet);
                }
                else
                {
                    // merge all
                    Feature completeMerge = mergeFeatures_(heavy_feature, heavy_feature_unmodified_sequence, medium_features_index, 2, 3);
                    final_feature_map.push_back(completeMerge);
                }
                // remove features from indices
                medium_features_index.erase(heavy_feature_unmodified_sequence);
            }
            else
            {
                // heavy feature is a singleton
                final_feature_map.push_back(heavy_feature);
            }
        }

        // clean up labeled_index
        for (Map<String, Feature>::iterator medium_channle_index_iterator = medium_features_index.begin(); medium_channle_index_iterator != medium_features_index.end(); ++medium_channle_index_iterator)
        {
            Feature& medium_channel_feature = medium_channle_index_iterator->second;
            medium_channel_feature.ensureUniqueId();

            String medium_channel_feature_unmodified_sequence = getUnmodifiedSequence_(medium_channel_feature, medium_channel_arginine_label_, medium_channel_lysine_label_);

            if (unlabeled_features_index.has(medium_channel_feature_unmodified_sequence))
            {
                // 1. case: pair between c0 and c1
                if (medium_channel_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
                {
                    // add features to final map
                    final_feature_map.push_back(medium_channel_feature);
                    final_feature_map.push_back(unlabeled_features_index[medium_channel_feature_unmodified_sequence]);

                    ConsensusFeature c_duplet;
                    c_duplet.insert(MEDIUM_FEATURE_MAPID_, medium_channel_feature);
                    c_duplet.insert(LIGHT_FEATURE_MAPID_, unlabeled_features_index[medium_channel_feature_unmodified_sequence]);
                    c_duplet.ensureUniqueId();
                    consensus_.push_back(c_duplet);
                }
                else
                {
                    // merge
                    Feature completeMerge = mergeFeatures_(medium_channel_feature, medium_channel_feature_unmodified_sequence, unlabeled_features_index, 1, 2);
                    final_feature_map.push_back(completeMerge);
                }
                // remove features from indices
                unlabeled_features_index.erase(medium_channel_feature_unmodified_sequence);
            }
            else
            {
                // c1 is alone
                final_feature_map.push_back(medium_channel_feature);
            }

        }

        // clean up unlabeled_index
        for (Map<String, Feature>::iterator unlabeled_index_iter = unlabeled_features_index.begin(); unlabeled_index_iter != unlabeled_features_index.end(); ++unlabeled_index_iter)
        {
            // the single ones from c0
            final_feature_map.push_back(unlabeled_index_iter->second);
        }
    }

    features_to_simulate.clear();
    features_to_simulate.push_back(final_feature_map);

    consensus_.setProteinIdentifications(final_feature_map.getProteinIdentifications());
    ConsensusMap::FileDescription map_description;
    map_description.label = "Simulation (Labeling Consensus)";
    map_description.size = features_to_simulate.size();
    consensus_.getFileDescriptions()[0] = map_description;
}
Ejemplo n.º 9
0
  void EDTAFile::load(const String& filename, ConsensusMap& consensus_map)
  {
    // load input
    TextFile input(filename);
    TextFile::ConstIterator input_it = input.begin();

    // reset map
    consensus_map = ConsensusMap();
    consensus_map.setUniqueId();

    char separator = ' ';
    if (input_it->hasSubstring("\t"))
      separator = '\t';
    else if (input_it->hasSubstring(" "))
      separator = ' ';
    else if (input_it->hasSubstring(","))
      separator = ',';

    // parsing header line
    std::vector<String> headers;
    input_it->split(separator, headers);
    int offset = 0;
    for (Size i = 0; i < headers.size(); ++i)
    {
      headers[i].trim();
    }
    String header_trimmed = *input.begin();
    header_trimmed.trim();

    enum
    {
      TYPE_UNDEFINED,
      TYPE_OLD_NOCHARGE,
      TYPE_OLD_CHARGE,
      TYPE_CONSENSUS
    }
    input_type = TYPE_UNDEFINED;
    Size input_features = 1;

    double rt = 0.0;
    double mz = 0.0;
    double it = 0.0;
    Int ch = 0;

    if (headers.size() <= 2)
    {
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: not enough columns! Expected at least 3 columns!\nOffending line: '") + header_trimmed + "'  (line 1)\n");
    }
    else if (headers.size() == 3)
      input_type = TYPE_OLD_NOCHARGE;
    else if (headers.size() == 4)
      input_type = TYPE_OLD_CHARGE;

    // see if we have a header
    try
    {
      // try to convert... if not: thats a header
      rt = headers[0].toDouble();
      mz = headers[1].toDouble();
      it = headers[2].toDouble();
    }
    catch (Exception::BaseException&)
    {
      offset = 1;
      ++input_it;
      LOG_INFO << "Detected a header line.\n";
    }

    if (headers.size() >= 5)
    {
      if (String(headers[4].trim()).toUpper() == "RT1")
        input_type = TYPE_CONSENSUS;
      else
        input_type = TYPE_OLD_CHARGE;
    }
    if (input_type == TYPE_CONSENSUS)
    {
      // Every consensus style line includes features with four columns.
      // The remainder is meta data
      input_features = headers.size() / 4;
    }

    if (offset == 0 && (input_type == TYPE_OLD_CHARGE || input_type == TYPE_CONSENSUS))
    {
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: No HEADER provided. This is only allowed for three columns. You have more!\nOffending line: '") + header_trimmed + "'  (line 1)\n");
    }

    SignedSize input_size = input.end() - input.begin();

    ConsensusMap::FileDescription desc;
    desc.filename = filename;
    desc.size = (input_size) - offset;
    consensus_map.getFileDescriptions()[0] = desc;

    // parsing features
    consensus_map.reserve(input_size);

    for (; input_it != input.end(); ++input_it)
    {
      //do nothing for empty lines
      String line_trimmed = *input_it;
      line_trimmed.trim();
      if (line_trimmed == "")
      {
        if ((input_it - input.begin()) < input_size - 1) LOG_WARN << "Notice: Empty line ignored (line " << ((input_it - input.begin()) + 1) << ").";
        continue;
      }

      //split line to tokens
      std::vector<String> parts;
      input_it->split(separator, parts);

      //abort if line does not contain enough fields
      if (parts.size() < 3)
      {
        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "",
                                    String("Failed parsing in line ")
                                    + String((input_it - input.begin()) + 1)
                                    + ": At least three columns are needed! (got  "
                                    + String(parts.size())
                                    + ")\nOffending line: '"
                                    + line_trimmed
                                    + "'  (line "
                                    + String((input_it - input.begin()) + 1)
                                    + ")\n");
      }

      ConsensusFeature cf;
      cf.setUniqueId();

      try
      {
        // Convert values. Will return -1 if not available.
        rt = checkedToDouble_(parts, 0);
        mz = checkedToDouble_(parts, 1);
        it = checkedToDouble_(parts, 2);
        ch = checkedToInt_(parts, 3);

        cf.setRT(rt);
        cf.setMZ(mz);
        cf.setIntensity(it);
        if (input_type != TYPE_OLD_NOCHARGE)
          cf.setCharge(ch);
      }
      catch (Exception::BaseException&)
      {
        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert the first three columns to a number!\nOffending line: '" + line_trimmed + "'  (line " + String((input_it - input.begin()) + 1) + ")\n");
      }

      // Check all features in one line
      for (Size j = 1; j < input_features; ++j)
      {
        try
        {
          Feature f;
          f.setUniqueId();

          // Convert values. Will return -1 if not available.
          rt = checkedToDouble_(parts, j * 4 + 0);
          mz = checkedToDouble_(parts, j * 4 + 1);
          it = checkedToDouble_(parts, j * 4 + 2);
          ch = checkedToInt_(parts, j * 4 + 3);

          // Only accept features with at least RT and MZ set
          if (rt != -1 && mz != -1)
          {
            f.setRT(rt);
            f.setMZ(mz);
            f.setIntensity(it);
            f.setCharge(ch);

            cf.insert(j - 1, f);
          }
        }
        catch (Exception::BaseException&)
        {
          throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert one of the four sub-feature columns (starting at column " + (j * 4 + 1) + ") to a number! Is the correct separator specified?\nOffending line: '" + line_trimmed + "'  (line " + String((input_it - input.begin()) + 1) + ")\n");
        }
      }

      //parse meta data
      for (Size j = input_features * 4; j < parts.size(); ++j)
      {
        String part_trimmed = parts[j];
        part_trimmed.trim();
        if (part_trimmed != "")
        {
          //check if column name is ok
          if (headers.size() <= j || headers[j] == "")
          {
            throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "",
                                        String("Error: Missing meta data header for column ") + (j + 1) + "!"
                                        + String("Offending header line: '") + header_trimmed + "'  (line 1)");
          }
          //add meta value
          cf.setMetaValue(headers[j], part_trimmed);
        }
      }

      //insert feature to map
      consensus_map.push_back(cf);
    }

    // register FileDescriptions
    ConsensusMap::FileDescription fd;
    fd.filename = filename;
    fd.size = consensus_map.size();
    Size maps = std::max(input_features - 1, Size(1)); // its either a simple feature or a consensus map
    // (in this case the 'input_features' includes the centroid, which we do not count)
    for (Size i = 0; i < maps; ++i)
    {
      fd.label = String("EDTA_Map ") + String(i);
      consensus_map.getFileDescriptions()[i] = fd;
    }

  }
Ejemplo n.º 10
0
  void QTClusterFinder::makeConsensusFeature_(list<QTCluster> & clustering,
           ConsensusFeature & feature, OpenMSBoost::unordered_map<GridFeature *,
             std::vector< QTCluster * > > & element_mapping)
  {
    // find the best cluster (a valid cluster with the highest score)
    list<QTCluster>::iterator best = clustering.begin();
    while (best != clustering.end() && best->isInvalid()) {++best;}
    for (list<QTCluster>::iterator it = best;
         it != clustering.end(); ++it)
    {
      if (!it->isInvalid())
      {
        if (it->getQuality() > best->getQuality())
        {
          best = it;
        }
      }
    }

    // no more clusters to process -> clear clustering and return
    if (best == clustering.end())
    {
      clustering.clear();
      return;
    }

    OpenMSBoost::unordered_map<Size, GridFeature *> elements;
    best->getElements(elements);
    // cout << "Elements: " << elements.size() << " with best " << best->getQuality() << " invalid " << best->isInvalid() << endl;

    // create consensus feature from best cluster:
    feature.setQuality(best->getQuality());
    for (OpenMSBoost::unordered_map<Size, GridFeature *>::const_iterator it = elements.begin();
         it != elements.end(); ++it)
    {
      feature.insert(it->first, it->second->getFeature());
    }
    feature.computeConsensus();


 
    // update the clustering:
    // 1. remove current "best" cluster
    // 2. update all clusters accordingly and invalidate elements whose central
    //    element is removed
    best->setInvalid();
    for (OpenMSBoost::unordered_map<Size, GridFeature *>::const_iterator it = elements.begin();
         it != elements.end(); ++it)
    {
      for (std::vector< QTCluster* >::iterator 
            cluster  = element_mapping[&(*it->second)].begin();
            cluster != element_mapping[&(*it->second)].end(); ++cluster)
      {
        // we do not want to update invalid features (saves time and does not
        // recompute the quality)
        if (!(*cluster)->isInvalid())
        {
          if (!(*cluster)->update(elements))       // cluster is invalid (center point removed):
          {
            (*cluster)->setInvalid();
          }
        }
      }
    }
  }
Ejemplo n.º 11
0
  void ICPLLabeler::postDigestHook(SimTypes::FeatureMapSimVector& features_to_simulate)
  {
    SimTypes::FeatureMapSim& light_labeled_features = features_to_simulate[0];
    SimTypes::FeatureMapSim& medium_labeled_features = features_to_simulate[1];

    if (param_.getValue("label_proteins") == "false") // loop for peptide-labeling (post-digest-labeling)
    {
      // iterate over first map for light labeling
      for (SimTypes::FeatureMapSim::iterator lf_iter = light_labeled_features.begin(); lf_iter != light_labeled_features.end(); ++lf_iter)
      {
        lf_iter->ensureUniqueId();
        addModificationToPeptideHit_(*lf_iter, light_channel_label_);
      }

      // iterate over second map for medium labeling
      for (SimTypes::FeatureMapSim::iterator lf_iter = medium_labeled_features.begin(); lf_iter != medium_labeled_features.end(); ++lf_iter)
      {
        lf_iter->ensureUniqueId();
        addModificationToPeptideHit_(*lf_iter, medium_channel_label_);
      }

      if (features_to_simulate.size() == 3) //third channel labeling can only be done, if a third channel exist
      {
        SimTypes::FeatureMapSim& heavy_labeled_features = features_to_simulate[2];

        // iterate over third map
        for (SimTypes::FeatureMapSim::iterator lf_iter = heavy_labeled_features.begin(); lf_iter != heavy_labeled_features.end(); ++lf_iter)
        {
          lf_iter->ensureUniqueId();
          addModificationToPeptideHit_(*lf_iter, heavy_channel_label_);
        }
      }
    }

    // merge the generated feature maps and create consensus
    SimTypes::FeatureMapSim final_feature_map = mergeProteinIdentificationsMaps_(features_to_simulate);

    if (features_to_simulate.size() == 2) // merge_modus for two FeatureMaps
    {
      // create index of light channel features for easy mapping of medium-to-light channel
      Map<String, Feature> light_labeled_features_index;
      for (SimTypes::FeatureMapSim::iterator light_labeled_features_iter = light_labeled_features.begin();
           light_labeled_features_iter != light_labeled_features.end();
           ++light_labeled_features_iter)
      {
        (*light_labeled_features_iter).ensureUniqueId();
        light_labeled_features_index.insert(std::make_pair(
                                              getUnmodifiedAASequence_((*light_labeled_features_iter), light_channel_label_),
                                              *light_labeled_features_iter
                                              ));
      }

      // iterate over second map
      for (SimTypes::FeatureMapSim::iterator medium_labeled_feature_iter = medium_labeled_features.begin(); medium_labeled_feature_iter != medium_labeled_features.end(); ++medium_labeled_feature_iter)
      {
        AASequence medium_labeled_feature_sequence = (*medium_labeled_feature_iter).getPeptideIdentifications()[0].getHits()[0].getSequence();

        // guarantee uniqueness
        (*medium_labeled_feature_iter).ensureUniqueId();

        // check if we have a pair
        if (light_labeled_features_index.has(getUnmodifiedAASequence_((*medium_labeled_feature_iter), medium_channel_label_)))
        {
          // own scope as we don't know what happens to 'f_modified' once we call erase() below
          Feature& light_labeled_feature = light_labeled_features_index[getUnmodifiedAASequence_((*medium_labeled_feature_iter), medium_channel_label_)];
          // guarantee uniqueness
          light_labeled_feature.ensureUniqueId();

          if (medium_labeled_feature_sequence.isModified()) // feature has a medium ICPL-Label and is not equal to light-labeled
          {
            // add features to final map
            final_feature_map.push_back(*medium_labeled_feature_iter);
            final_feature_map.push_back(light_labeled_feature);

            // create consensus feature
            ConsensusFeature cf;
            cf.insert(MEDIUM_FEATURE_MAPID_, *medium_labeled_feature_iter);
            cf.insert(LIGHT_FEATURE_MAPID_, light_labeled_feature);

            consensus_.push_back(cf);

            // remove light-labeled feature
            light_labeled_features_index.erase(getUnmodifiedAASequence_((*medium_labeled_feature_iter), medium_channel_label_));
          }
          else
          {
            // merge features since they are equal
            Feature final_feature = mergeFeatures_(*medium_labeled_feature_iter, medium_labeled_feature_sequence, light_labeled_features_index);
            final_feature_map.push_back(final_feature);
          }
        }
        else // no ICPL pair, just add the medium-labeled one
        {
          final_feature_map.push_back(*medium_labeled_feature_iter);
        }
      }

      // add singletons from light-labeled channel
      // clean up light-labeled_index
      for (Map<String, Feature>::iterator light_labeled_index_iter = light_labeled_features_index.begin(); light_labeled_index_iter != light_labeled_features_index.end(); ++light_labeled_index_iter)
      {
        // the single ones from c0
        final_feature_map.push_back(light_labeled_index_iter->second);
      }
    }
    else if (features_to_simulate.size() == 3) // merge_modus for three Channels
    {
      // create index of light channel features for easy mapping of heavy-to-medium-to-light channel
      Map<String, Feature> light_labeled_features_index;
      for (SimTypes::FeatureMapSim::iterator light_labeled_features_iter = light_labeled_features.begin();
           light_labeled_features_iter != light_labeled_features.end();
           ++light_labeled_features_iter)
      {
        (*light_labeled_features_iter).ensureUniqueId();
        light_labeled_features_index.insert(std::make_pair(
                                              getUnmodifiedAASequence_(*light_labeled_features_iter, light_channel_label_),
                                              *light_labeled_features_iter
                                              ));
      }

      // create index of medium channel features for easy mapping of heavy-to-medium-to-light channel
      Map<String, Feature> medium_labeled_features_index;
      for (SimTypes::FeatureMapSim::iterator medium_labeled_features_iter = medium_labeled_features.begin();
           medium_labeled_features_iter != medium_labeled_features.end();
           ++medium_labeled_features_iter)
      {
        (*medium_labeled_features_iter).ensureUniqueId();
        medium_labeled_features_index.insert(std::make_pair(
                                               getUnmodifiedAASequence_((*medium_labeled_features_iter), medium_channel_label_),
                                               *medium_labeled_features_iter
                                               ));
      }

      for (SimTypes::FeatureMapSim::iterator heavy_labeled_feature_iter = features_to_simulate[2].begin(); heavy_labeled_feature_iter != features_to_simulate[2].end(); ++heavy_labeled_feature_iter)
      {
        Feature& heavy_feature = *heavy_labeled_feature_iter;
        String heavy_feature_unmodified_sequence = getUnmodifiedAASequence_(heavy_feature, heavy_channel_label_);
        heavy_feature.ensureUniqueId();

        if (light_labeled_features_index.has(heavy_feature_unmodified_sequence) && medium_labeled_features_index.has(heavy_feature_unmodified_sequence))
        {
          // 1st case .. it is a triplet
          if (heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
          {
            // if heavy feature is modified, all light and medium channel are also

            // add features to final map
            final_feature_map.push_back(heavy_feature);
            final_feature_map.push_back(medium_labeled_features_index[heavy_feature_unmodified_sequence]);
            final_feature_map.push_back(light_labeled_features_index[heavy_feature_unmodified_sequence]);

            // create triplet consensus feature
            ConsensusFeature c_triplet;
            c_triplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
            c_triplet.insert(LIGHT_FEATURE_MAPID_, light_labeled_features_index[heavy_feature_unmodified_sequence]);
            c_triplet.insert(MEDIUM_FEATURE_MAPID_, medium_labeled_features_index[heavy_feature_unmodified_sequence]);

            consensus_.push_back(c_triplet);
          }
          else
          {
            // merge all three channels
            Feature c2c1 = mergeFeatures_(heavy_feature, AASequence::fromString(heavy_feature_unmodified_sequence), medium_labeled_features_index);
            Feature completeMerge = mergeFeatures_(c2c1, AASequence::fromString(heavy_feature_unmodified_sequence), light_labeled_features_index);

            final_feature_map.push_back(completeMerge);
          }
          // remove features from indices
          light_labeled_features_index.erase(heavy_feature_unmodified_sequence);
          medium_labeled_features_index.erase(heavy_feature_unmodified_sequence);
        }
        else if (light_labeled_features_index.has(heavy_feature_unmodified_sequence))
        {
          // 2.Fall -> c0 - c2
          if (heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
          {
            // add features to final map
            final_feature_map.push_back(heavy_feature);
            final_feature_map.push_back(light_labeled_features_index[heavy_feature_unmodified_sequence]);

            ConsensusFeature c_triplet;
            c_triplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
            c_triplet.insert(LIGHT_FEATURE_MAPID_, light_labeled_features_index[heavy_feature_unmodified_sequence]);

            consensus_.push_back(c_triplet);
          }
          else
          {
            // merge all three channels
            Feature completeMerge = mergeFeatures_(heavy_feature, AASequence::fromString(heavy_feature_unmodified_sequence), light_labeled_features_index);
            final_feature_map.push_back(completeMerge);
          }
          // remove features from indices
          light_labeled_features_index.erase(heavy_feature_unmodified_sequence);
        }
        else if (medium_labeled_features_index.has(heavy_feature_unmodified_sequence))
        {
          // 3.Fall -> c1 - c2
          if (heavy_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
          {
            // add features to final map
            final_feature_map.push_back(heavy_feature);
            final_feature_map.push_back(medium_labeled_features_index[heavy_feature_unmodified_sequence]);

            ConsensusFeature c_triplet;
            c_triplet.insert(HEAVY_FEATURE_MAPID_, heavy_feature);
            c_triplet.insert(MEDIUM_FEATURE_MAPID_, medium_labeled_features_index[heavy_feature_unmodified_sequence]);

            consensus_.push_back(c_triplet);
          }
          else
          {
            // merge all
            Feature completeMerge = mergeFeatures_(heavy_feature, AASequence::fromString(heavy_feature_unmodified_sequence), medium_labeled_features_index);
            final_feature_map.push_back(completeMerge);
          }
          // remove features from indices
          medium_labeled_features_index.erase(heavy_feature_unmodified_sequence);
        }
        else
        {
          // 4.Fall -> alleine
          final_feature_map.push_back(heavy_feature);
        }
      }

      // clean up medium-labeled_index
      for (Map<String, Feature>::iterator medium_labeled_index_iter = medium_labeled_features_index.begin(); medium_labeled_index_iter != medium_labeled_features_index.end(); ++medium_labeled_index_iter)
      {
        Feature& medium_labeled_feature = medium_labeled_index_iter->second;
        medium_labeled_feature.ensureUniqueId();

        String medium_labeled_feature_unmodified_sequence = medium_labeled_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString();

        if (light_labeled_features_index.has(medium_labeled_feature_unmodified_sequence))
        {
          // 1. case: pair between c0 and c1
          if (medium_labeled_feature.getPeptideIdentifications()[0].getHits()[0].getSequence().isModified())
          {
            // add features to final map
            final_feature_map.push_back(medium_labeled_feature);
            final_feature_map.push_back(light_labeled_features_index[medium_labeled_feature_unmodified_sequence]);

            ConsensusFeature c_triplet;
            c_triplet.insert(MEDIUM_FEATURE_MAPID_, medium_labeled_feature);
            c_triplet.insert(LIGHT_FEATURE_MAPID_, light_labeled_features_index[medium_labeled_feature_unmodified_sequence]);

            consensus_.push_back(c_triplet);
          }
          else
          {
            // merge
            Feature completeMerge = mergeFeatures_(medium_labeled_feature, AASequence::fromString(medium_labeled_feature_unmodified_sequence), light_labeled_features_index);
            final_feature_map.push_back(completeMerge);
          }
          // remove features from indices
          light_labeled_features_index.erase(medium_labeled_feature_unmodified_sequence);
        }
        else
        {
          // c1 is alone
          final_feature_map.push_back(medium_labeled_feature);
        }
      }

      // clean up light-labeled_index
      for (Map<String, Feature>::iterator light_labeled_index_iter = light_labeled_features_index.begin(); light_labeled_index_iter != light_labeled_features_index.end(); ++light_labeled_index_iter)
      {
        // the single ones from c0
        final_feature_map.push_back(light_labeled_index_iter->second);
      }
    }

    features_to_simulate.clear();
    features_to_simulate.push_back(final_feature_map);

    consensus_.setProteinIdentifications(final_feature_map.getProteinIdentifications());
    ConsensusMap::FileDescription map_description;
    map_description.label = "Simulation (Labeling Consensus)";
    map_description.size = features_to_simulate.size();
    consensus_.getFileDescriptions()[0] = map_description;
  }