Ejemplo n.º 1
0
  boost::shared_ptr<IsobaricQuantitationMethod> IBSpectraFile::guessExperimentType_(const ConsensusMap& cm)
  {
    if (cm.getExperimentType() != "labeled_MS2" && cm.getExperimentType() != "itraq")
    {
      throw Exception::InvalidParameter(__FILE__,
                                        __LINE__,
                                        __PRETTY_FUNCTION__,
                                        "Given ConsensusMap does not hold any isobaric quantification data.");
    }

    // we take the mapcount as approximation
    if (cm.getFileDescriptions().size() == 4)
    {
      return boost::shared_ptr<IsobaricQuantitationMethod>(new ItraqFourPlexQuantitationMethod);
    }
    else if (cm.getFileDescriptions().size() == 6)
    {
      return boost::shared_ptr<IsobaricQuantitationMethod>(new TMTSixPlexQuantitationMethod);
    }
    else if (cm.getFileDescriptions().size() == 8)
    {
      return boost::shared_ptr<IsobaricQuantitationMethod>(new ItraqEightPlexQuantitationMethod);
    }
    else
    {
      throw Exception::InvalidParameter(__FILE__,
                                        __LINE__,
                                        __PRETTY_FUNCTION__,
                                        "Could not guess isobaric quantification data from ConsensusMap due to non-matching number of input maps.");
    }
  }
 void SeedListGenerator::generateSeedLists(const ConsensusMap& consensus,
                                           Map<UInt64, SeedList>& seed_lists)
 {
   seed_lists.clear();
   // iterate over all consensus features...
   for (ConsensusMap::ConstIterator cons_it = consensus.begin();
        cons_it != consensus.end(); ++cons_it)
   {
     DPosition<2> point(cons_it->getRT(), cons_it->getMZ());
     // for each sub-map in the consensus map, add a seed at the position of
     // this consensus feature:
     for (ConsensusMap::FileDescriptions::const_iterator file_it =
            consensus.getFileDescriptions().begin(); file_it !=
          consensus.getFileDescriptions().end(); ++file_it)
       seed_lists[file_it->first].push_back(point);
     // for each feature contained in the consensus feature, remove the seed of
     // the corresponding map:
     for (ConsensusFeature::HandleSetType::const_iterator feat_it =
            cons_it->getFeatures().begin(); feat_it !=
          cons_it->getFeatures().end(); ++feat_it)
     {
       seed_lists[feat_it->getMapIndex()].pop_back();
     }
     // this leaves seeds for maps where no feature was found near the
     // consensus position
   }
 }
  void FeatureGroupingAlgorithm::transferSubelements(const vector<ConsensusMap>& maps, ConsensusMap& out) const
  {
    // accumulate file descriptions from the input maps:
    // cout << "Updating file descriptions..." << endl;
    out.getFileDescriptions().clear();
    // mapping: (map index, original id) -> new id
    map<pair<Size, UInt64>, Size> mapid_table;
    for (Size i = 0; i < maps.size(); ++i)
    {
      const ConsensusMap& consensus = maps[i];
      for (ConsensusMap::FileDescriptions::const_iterator desc_it = consensus.getFileDescriptions().begin(); desc_it != consensus.getFileDescriptions().end(); ++desc_it)
      {
        Size counter = mapid_table.size();
        mapid_table[make_pair(i, desc_it->first)] = counter;
        out.getFileDescriptions()[counter] = desc_it->second;
      }
    }

    // look-up table: input map -> unique ID -> consensus feature
    // cout << "Creating look-up table..." << endl;
    vector<map<UInt64, ConsensusMap::ConstIterator> > feat_lookup(maps.size());
    for (Size i = 0; i < maps.size(); ++i)
    {
      const ConsensusMap& consensus = maps[i];
      for (ConsensusMap::ConstIterator feat_it = consensus.begin();
           feat_it != consensus.end(); ++feat_it)
      {
        // do NOT use "id_lookup[i][feat_it->getUniqueId()] = feat_it;" here as
        // you will get "attempt to copy-construct an iterator from a singular
        // iterator" in STL debug mode:
        feat_lookup[i].insert(make_pair(feat_it->getUniqueId(), feat_it));
      }
    }
    // adjust the consensus features:
    // cout << "Adjusting consensus features..." << endl;
    for (ConsensusMap::iterator cons_it = out.begin(); cons_it != out.end(); ++cons_it)
    {
      ConsensusFeature adjusted = ConsensusFeature(
        static_cast<BaseFeature>(*cons_it)); // remove sub-features
      for (ConsensusFeature::HandleSetType::const_iterator sub_it = cons_it->getFeatures().begin(); sub_it != cons_it->getFeatures().end(); ++sub_it)
      {
        UInt64 id = sub_it->getUniqueId();
        Size map_index = sub_it->getMapIndex();
        ConsensusMap::ConstIterator origin = feat_lookup[map_index][id];
        for (ConsensusFeature::HandleSetType::const_iterator handle_it = origin->getFeatures().begin(); handle_it != origin->getFeatures().end(); ++handle_it)
        {
          FeatureHandle handle = *handle_it;
          Size new_id = mapid_table[make_pair(map_index, handle.getMapIndex())];
          handle.setMapIndex(new_id);
          adjusted.insert(handle);
        }
      }
      *cons_it = adjusted;
    }
  }
  void FeatureGroupingAlgorithmLabeled::group(const std::vector<FeatureMap<> > & maps, ConsensusMap & out)
  {
    //check that the number of maps is ok
    if (maps.size() != 1)
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Exactly one map must be given!");
    if (out.getFileDescriptions().size() != 2)
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Two file descriptions must be set in 'out'!");

    //initialize LabeledPairFinder
    LabeledPairFinder pm;
    pm.setParameters(param_.copy("", true));

    //convert to consensus map
    std::vector<ConsensusMap> input(1);
    ConsensusMap::convert(0, maps[0], input[0]);

    //run
    pm.run(input, out);
  }
  void IsobaricChannelExtractor::registerChannelsInOutputMap_(ConsensusMap& consensus_map)
  {
    // register the individual channels in the output consensus map
    Int index = 0;
    for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin();
         cl_it != quant_method_->getChannelInformation().end();
         ++cl_it)
    {
      ConsensusMap::FileDescription channel_as_map;
      // label is the channel + description provided in the Params
      channel_as_map.label = quant_method_->getName() + "_" + cl_it->name;

      // TODO(aiche): number of features need to be set later
      channel_as_map.size = consensus_map.size();

      // add some more MetaInfo
      channel_as_map.setMetaValue("channel_name", cl_it->name);
      channel_as_map.setMetaValue("channel_id", cl_it->id);
      channel_as_map.setMetaValue("channel_description", cl_it->description);
      channel_as_map.setMetaValue("channel_center", cl_it->center);
      consensus_map.getFileDescriptions()[index++] = channel_as_map;
    }
  }
  void IsobaricQuantifier::computeLabelingStatistics_(ConsensusMap& consensus_map_out)
  {
    // number of total quantified spectra
    stats_.number_ms2_total = consensus_map_out.size();

    // Labeling efficiency statistics
    for (size_t i = 0; i < consensus_map_out.size(); ++i)
    {
      // is whole scan empty?!
      if (consensus_map_out[i].getIntensity() == 0) ++stats_.number_ms2_empty;

      // look at single reporters
      for (ConsensusFeature::HandleSetType::const_iterator it_elements = consensus_map_out[i].begin();
           it_elements != consensus_map_out[i].end();
           ++it_elements)
      {
        if (it_elements->getIntensity() == 0)
        {
          String ch_index = consensus_map_out.getFileDescriptions()[it_elements->getMapIndex()].getMetaValue("channel_name");
          ++stats_.empty_channels[ch_index];
        }
      }
    }
    LOG_INFO << "IsobaricQuantifier: skipped " << stats_.number_ms2_empty << " of " << consensus_map_out.size() << " selected scans due to lack of reporter information:\n";
    consensus_map_out.setMetaValue("isoquant:scans_noquant", stats_.number_ms2_empty);
    consensus_map_out.setMetaValue("isoquant:scans_total", consensus_map_out.size());

    LOG_INFO << "IsobaricQuantifier: channels with signal\n";
    for (std::map<String, Size>::const_iterator it_m = stats_.empty_channels.begin();
         it_m != stats_.empty_channels.end();
         ++it_m)
    {
      LOG_INFO << "      channel " << it_m->first << ": " << (consensus_map_out.size() - it_m->second) << " / " <<  consensus_map_out.size() << " (" << ((consensus_map_out.size() - it_m->second) * 100 / consensus_map_out.size()) << "%)\n";
      consensus_map_out.setMetaValue(String("isoquant:quantifyable_ch") + it_m->first, (consensus_map_out.size() - it_m->second));
    }

  }
Ejemplo n.º 7
0
  ExitCodes main_(int, const char **)
  {
    // data to be passed through the algorithm
    vector<vector<SILACPattern> > data;
    MSQuantifications msq;
    vector<Clustering *> cluster_data;

    // 
    // Parameter handling
    // 
    map<String, DoubleReal> label_identifiers;   // list defining the mass shifts of each label (e.g. "Arg6" => 6.0201290268)
    handleParameters_sample();
    handleParameters_algorithm();
    handleParameters_labels(label_identifiers);
    handleParameters();

    if (selected_labels.empty() && !out.empty()) // incompatible parameters
    {
      writeLog_("Error: The 'out' parameter cannot be used without a label (parameter 'sample:labels'). Use 'out_features' instead.");
      return ILLEGAL_PARAMETERS;
    }

    // 
    // Initializing the SILACAnalzer with our parameters
    // 
    SILACAnalyzer analyzer;
    analyzer.setLogType(log_type_);
    analyzer.initialize(
      // section "sample"
      selected_labels,
      charge_min,
      charge_max,
      missed_cleavages,
      isotopes_per_peptide_min,
      isotopes_per_peptide_max,
      // section "algorithm"
      rt_threshold,
      rt_min,
      intensity_cutoff,
      intensity_correlation,
      model_deviation,
      allow_missing_peaks,
      // labels
      label_identifiers);


    //--------------------------------------------------
    // loading input from .mzML
    //--------------------------------------------------

    MzMLFile file;
    MSExperiment<Peak1D> exp;

    // only read MS1 spectra ...
    /*
    std::vector<int> levels;
    levels.push_back(1);
    file.getOptions().setMSLevels(levels);
    */
    LOG_DEBUG << "Loading input..." << endl;
    file.setLogType(log_type_);
    file.load(in, exp);

    // set size of input map
    exp.updateRanges();

    // extract level 1 spectra
    exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<MSExperiment<Peak1D>::SpectrumType>(IntList::create("1"), true)), exp.end());

    // sort according to RT and MZ
    exp.sortSpectra();

    if (out_mzq != "")
    {
      vector<vector<String> > SILAClabels = analyzer.getSILAClabels(); // list of SILAC labels, e.g. selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6"

      std::vector<std::vector<std::pair<String, DoubleReal> > > labels;
      //add none label
      labels.push_back(std::vector<std::pair<String, DoubleReal> >(1, std::make_pair<String, DoubleReal>(String("none"), DoubleReal(0))));
      for (Size i = 0; i < SILAClabels.size(); ++i)       //SILACLabels MUST be in weight order!!!
      {
        std::vector<std::pair<String, DoubleReal> > one_label;
        for (UInt j = 0; j < SILAClabels[i].size(); ++j)
        {
          one_label.push_back(*(label_identifiers.find(SILAClabels[i][j])));              // this dereferencing would break if all SILAClabels would not have been checked before!
        }
        labels.push_back(one_label);
      }
      msq.registerExperiment(exp, labels);       //add assays
      msq.assignUIDs();
    }
    MSQuantifications::QUANT_TYPES quant_type = MSQuantifications::MS1LABEL;
    msq.setAnalysisSummaryQuantType(quant_type);    //add analysis_summary_

    //--------------------------------------------------
    // estimate peak width
    //--------------------------------------------------

    LOG_DEBUG << "Estimating peak width..." << endl;
    PeakWidthEstimator::Result peak_width;
    try
    {
      peak_width = analyzer.estimatePeakWidth(exp);
    }
    catch (Exception::InvalidSize &)
    {
      writeLog_("Error: Unable to estimate peak width of input data.");
      return INCOMPATIBLE_INPUT_DATA;
    }


    if (in_filters == "")
    {
      //--------------------------------------------------
      // filter input data
      //--------------------------------------------------

      LOG_DEBUG << "Filtering input data..." << endl;
      analyzer.filterData(exp, peak_width, data); 

      //--------------------------------------------------
      // store filter results
      //--------------------------------------------------

      if (out_filters != "")
      {
        LOG_DEBUG << "Storing filtering results..." << endl;
        ConsensusMap map;
        for (std::vector<std::vector<SILACPattern> >::const_iterator it = data.begin(); it != data.end(); ++it)
        {
          analyzer.generateFilterConsensusByPattern(map, *it);
        }
        analyzer.writeConsensus(out_filters, map);
      }
    }
    else
    {
      //--------------------------------------------------
      // load filter results
      //--------------------------------------------------

      LOG_DEBUG << "Loading filtering results..." << endl;
      ConsensusMap map;
      analyzer.readConsensus(in_filters, map);
      analyzer.readFilterConsensusByPattern(map, data);
    }

    //--------------------------------------------------
    // clustering
    //--------------------------------------------------

    LOG_DEBUG << "Clustering data..." << endl;
    analyzer.clusterData(exp, peak_width, cluster_data, data);

    //--------------------------------------------------------------
    // write output
    //--------------------------------------------------------------

    if (out_debug != "")
    {
      LOG_DEBUG << "Writing debug output file..." << endl;
      std::ofstream out((out_debug + ".clusters.csv").c_str());

      vector<vector<DoubleReal> > massShifts = analyzer.getMassShifts(); // list of mass shifts

      // generate header
      out
      << std::fixed << std::setprecision(8)
      << "ID,RT,MZ_PEAK,CHARGE";
      for (UInt i = 1; i <= massShifts[0].size(); ++i)
      {
        out << ",DELTA_MASS_" << i + 1;
      }
      for (UInt i = 0; i <= massShifts[0].size(); ++i)
      {
        for (UInt j = 1; j <= isotopes_per_peptide_max; ++j)
        {
          out << ",INT_PEAK_" << i + 1 << '_' << j;
        }
      }
      out << ",MZ_RAW";
      for (UInt i = 0; i <= massShifts[0].size(); ++i)
      {
        for (UInt j = 1; j <= isotopes_per_peptide_max; ++j)
        {
          out << ",INT_RAW_" << i + 1 << '_' << j;
        }
      }
      for (UInt i = 0; i <= massShifts[0].size(); ++i)
      {
        for (UInt j = 1; j <= isotopes_per_peptide_max; ++j)
        {
          out << ",MZ_RAW_" << i + 1 << '_' << j;
        }
      }
      out << '\n';

      // write data
      UInt cluster_id = 0;
      for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
      {
        analyzer.generateClusterDebug(out, **it, cluster_id);
      }
    }

    if (out != "")
    {
      LOG_DEBUG << "Generating output consensus map..." << endl;
      ConsensusMap map;

      for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
      {
        analyzer.generateClusterConsensusByCluster(map, **it);
      }

      LOG_DEBUG << "Adding meta data..." << endl;
      // XXX: Need a map per mass shift
      ConsensusMap::FileDescriptions& desc = map.getFileDescriptions();
      Size id = 0;
      for (ConsensusMap::FileDescriptions::iterator it = desc.begin(); it != desc.end(); ++it)
      {
        if (test_mode_) it->second.filename = in; // skip path, since its not cross platform and complicates verification
        else it->second.filename = File::basename(in);
        // Write correct label
        // (this would crash if used without a label!)
        if (id > 0) it->second.label = StringList(analyzer.getSILAClabels()[id - 1]).concatenate(""); // skip first round (empty label is not listed)
        ++id;
      }

      std::set<DataProcessing::ProcessingAction> actions;
      actions.insert(DataProcessing::DATA_PROCESSING);
      actions.insert(DataProcessing::PEAK_PICKING);
      actions.insert(DataProcessing::FILTERING);
      actions.insert(DataProcessing::QUANTITATION);

      addDataProcessing_(map, getProcessingInfo_(actions));

      analyzer.writeConsensus(out, map);
      if (out_mzq != "")
      {
        LOG_DEBUG << "Generating output mzQuantML file..." << endl;
        ConsensusMap numap(map);
        //calc. ratios
        for (ConsensusMap::iterator cit = numap.begin(); cit != numap.end(); ++cit)
        {
          //~ make ratio templates
          std::vector<ConsensusFeature::Ratio> rts;
          for (std::vector<MSQuantifications::Assay>::const_iterator ait = msq.getAssays().begin() + 1; ait != msq.getAssays().end(); ++ait)
          {
            ConsensusFeature::Ratio r;
            r.numerator_ref_ = String(msq.getAssays().begin()->uid_);
            r.denominator_ref_ = String(ait->uid_);
            r.description_.push_back("Simple ratio calc");
            r.description_.push_back("light to medium/.../heavy");
            //~ "<cvParam cvRef=\"PSI-MS\" accession=\"MS:1001132\" name=\"peptide ratio\"/>"
            rts.push_back(r);
          }

          const ConsensusFeature::HandleSetType& feature_handles = cit->getFeatures();
          if (feature_handles.size() > 1)
          {
            std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fit = feature_handles.begin();             // this is unlabeled
            fit++;
            for (; fit != feature_handles.end(); ++fit)
            {
              Size ri = std::distance(feature_handles.begin(), fit);
              rts[ri - 1].ratio_value_ =  feature_handles.begin()->getIntensity() / fit->getIntensity();             // a proper silacalanyzer algo should never have 0-intensities so no 0devison ...
            }
          }

          cit->setRatios(rts);
        }
        msq.addConsensusMap(numap);        //add SILACAnalyzer result

        //~ msq.addFeatureMap();//add SILACAnalyzer evidencetrail as soon as clear what is realy contained in the featuremap
        //~ add AuditCollection - no such concept in TOPPTools yet
        analyzer.writeMzQuantML(out_mzq, msq);
      }
    }

    if (out_clusters != "")
    {
      LOG_DEBUG << "Generating cluster output file..." << endl;
      ConsensusMap map;
      for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
      {
        UInt cluster_id = 0;
        analyzer.generateClusterConsensusByPattern(map, **it, cluster_id);
      }

      ConsensusMap::FileDescription & desc = map.getFileDescriptions()[0];
      desc.filename = in;
      desc.label = "Cluster";

      analyzer.writeConsensus(out_clusters, map);
    }

    if (out_features != "")
    {
      LOG_DEBUG << "Generating output feature map..." << endl;
      FeatureMap<> map;
      for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
      {
        analyzer.generateClusterFeatureByCluster(map, **it);
      }

      analyzer.writeFeatures(out_features, map);
    }

    return EXECUTION_OK;
  }
  vector<double> ConsensusMapNormalizerAlgorithmThreshold::computeCorrelation(const ConsensusMap& map, const double& ratio_threshold, const String& acc_filter, const String& desc_filter)
  {
    Size number_of_features = map.size();
    Size number_of_maps = map.getFileDescriptions().size();
    vector<vector<double> > feature_int(number_of_maps);

    //get map with most features, resize feature_int
    UInt map_with_most_features_idx = 0;
    ConsensusMap::FileDescriptions::const_iterator map_with_most_features = map.getFileDescriptions().find(0);
    for (UInt i = 0; i < number_of_maps; i++)
    {
      feature_int[i].resize(number_of_features);
      ConsensusMap::FileDescriptions::const_iterator it = map.getFileDescriptions().find(i);
      if (it->second.size > map_with_most_features->second.size)
      {
        map_with_most_features = it;
        map_with_most_features_idx = i;
      }
    }

    //fill feature_int with intensities
    Size pass_counter = 0;
    ConsensusMap::ConstIterator cf_it;
    UInt idx = 0;
    for (cf_it = map.begin(); cf_it != map.end(); ++cf_it, ++idx)
    {
      if (!ConsensusMapNormalizerAlgorithmMedian::passesFilters_(cf_it, map, acc_filter, desc_filter))
      {
        continue;
      }
      ++pass_counter;

      ConsensusFeature::HandleSetType::const_iterator f_it;
      for (f_it = cf_it->getFeatures().begin(); f_it != cf_it->getFeatures().end(); ++f_it)
      {
        feature_int[f_it->getMapIndex()][idx] = f_it->getIntensity();
      }
    }

    LOG_INFO << endl << "Using " << pass_counter << "/" << map.size() <<  " consensus features for computing normalization coefficients" << endl << endl;

    //determine ratio
    vector<double> ratio_vector(number_of_maps);
    for (UInt j = 0; j < number_of_maps; j++)
    {
      vector<double> ratios;
      for (UInt k = 0; k < number_of_features; ++k)
      {
        if (feature_int[map_with_most_features_idx][k] != 0.0 && feature_int[j][k] != 0.0)
        {
          double ratio = feature_int[map_with_most_features_idx][k] / feature_int[j][k];
          if (ratio > ratio_threshold && ratio < 1 / ratio_threshold)
          {
            ratios.push_back(ratio);
          }
        }
      }
      if (ratios.empty())
      {
        LOG_WARN << endl << "Not enough features passing filters. Cannot compute normalization coefficients for all maps. Result will be unnormalized." << endl << endl;
        return vector<double>(number_of_maps, 1.0);
      }
      ratio_vector[j] = Math::mean(ratios.begin(), ratios.end());
    }
    return ratio_vector;
  }
  ExitCodes main_(int, const char **)
  {
    FeatureGroupingAlgorithmUnlabeled * algorithm = new FeatureGroupingAlgorithmUnlabeled();

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    StringList ins;
    ins = getStringList_("in");
    String out = getStringOption_("out");

    //-------------------------------------------------------------
    // check for valid input
    //-------------------------------------------------------------
    // check if all input files have the correct type
    FileTypes::Type file_type = FileHandler::getType(ins[0]);
    for (Size i = 0; i < ins.size(); ++i)
    {
      if (FileHandler::getType(ins[i]) != file_type)
      {
        writeLog_("Error: All input files must be of the same type!");
        return ILLEGAL_PARAMETERS;
      }
    }

    //-------------------------------------------------------------
    // set up algorithm
    //-------------------------------------------------------------
    Param algorithm_param = getParam_().copy("algorithm:", true);
    writeDebug_("Used algorithm parameters", algorithm_param, 3);
    algorithm->setParameters(algorithm_param);

    Size reference_index(0);
    //-------------------------------------------------------------
    // perform grouping
    //-------------------------------------------------------------
    // load input
    ConsensusMap out_map;
    StringList ms_run_locations;
    if (file_type == FileTypes::FEATUREXML)
    {
      // use map with highest number of features as reference:
      Size max_count(0);
      FeatureXMLFile f;
      for (Size i = 0; i < ins.size(); ++i)
      {
        Size s = f.loadSize(ins[i]);
        if (s > max_count)
        {
          max_count = s;
          reference_index = i;
        }
      }

      // Load reference map and input it to the algorithm
      UInt64 ref_id;
      Size ref_size;
      std::vector<PeptideIdentification> ref_pepids;
      std::vector<ProteinIdentification> ref_protids;
      {
        FeatureMap map_ref;
        FeatureXMLFile f_fxml_tmp;
        f_fxml_tmp.getOptions().setLoadConvexHull(false);
        f_fxml_tmp.getOptions().setLoadSubordinates(false);
        f_fxml_tmp.load(ins[reference_index], map_ref);
        algorithm->setReference(reference_index, map_ref);
        ref_id = map_ref.getUniqueId();
        ref_size = map_ref.size();
        ref_pepids = map_ref.getUnassignedPeptideIdentifications();
        ref_protids = map_ref.getProteinIdentifications();
      }

      ConsensusMap dummy;
      // go through all input files and add them to the result one by one
      for (Size i = 0; i < ins.size(); ++i)
      {

        FeatureXMLFile f_fxml_tmp;
        FeatureMap tmp_map;
        f_fxml_tmp.getOptions().setLoadConvexHull(false);
        f_fxml_tmp.getOptions().setLoadSubordinates(false);
        f_fxml_tmp.load(ins[i], tmp_map);

        // copy over information on the primary MS run
        StringList ms_runs;
        tmp_map.getPrimaryMSRunPath(ms_runs);
        ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());

        if (i != reference_index)
        {
          algorithm->addToGroup(i, tmp_map);

          // store some meta-data about the maps in the "dummy" object -> try to
          // keep the same order as they were given in the input independent of
          // which map is the reference.

          dummy.getFileDescriptions()[i].filename = ins[i];
          dummy.getFileDescriptions()[i].size = tmp_map.size();
          dummy.getFileDescriptions()[i].unique_id = tmp_map.getUniqueId();

          // add protein identifications to result map
          dummy.getProteinIdentifications().insert(
            dummy.getProteinIdentifications().end(),
            tmp_map.getProteinIdentifications().begin(),
            tmp_map.getProteinIdentifications().end());

          // add unassigned peptide identifications to result map
          dummy.getUnassignedPeptideIdentifications().insert(
            dummy.getUnassignedPeptideIdentifications().end(),
            tmp_map.getUnassignedPeptideIdentifications().begin(),
            tmp_map.getUnassignedPeptideIdentifications().end());
        }
        else
        {
          // copy the meta-data from the refernce map
          dummy.getFileDescriptions()[i].filename = ins[i];
          dummy.getFileDescriptions()[i].size = ref_size;
          dummy.getFileDescriptions()[i].unique_id = ref_id;

          // add protein identifications to result map
          dummy.getProteinIdentifications().insert(
            dummy.getProteinIdentifications().end(),
            ref_protids.begin(),
            ref_protids.end());

          // add unassigned peptide identifications to result map
          dummy.getUnassignedPeptideIdentifications().insert(
            dummy.getUnassignedPeptideIdentifications().end(),
            ref_pepids.begin(),
            ref_pepids.end());
        }
      }

      // get the resulting map
      out_map = algorithm->getResultMap();

      //
      // Copy back meta-data (Protein / Peptide ids / File descriptions)
      //

      // add protein identifications to result map
      out_map.getProteinIdentifications().insert(
        out_map.getProteinIdentifications().end(),
        dummy.getProteinIdentifications().begin(),
        dummy.getProteinIdentifications().end());

      // add unassigned peptide identifications to result map
      out_map.getUnassignedPeptideIdentifications().insert(
        out_map.getUnassignedPeptideIdentifications().end(),
        dummy.getUnassignedPeptideIdentifications().begin(),
        dummy.getUnassignedPeptideIdentifications().end());

      out_map.setFileDescriptions(dummy.getFileDescriptions());

      // canonical ordering for checking the results, and the ids have no real meaning anyway
      // the way this was done in DelaunayPairFinder and StablePairFinder
      // -> the same ordering as FeatureGroupingAlgorithmUnlabeled::group applies!
      out_map.sortByMZ();
      out_map.updateRanges();
    }
    else
    {
      vector<ConsensusMap> maps(ins.size());
      ConsensusXMLFile f;
      for (Size i = 0; i < ins.size(); ++i)
      {
        f.load(ins[i], maps[i]);
        StringList ms_runs;
        maps[i].getPrimaryMSRunPath(ms_runs);
        ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
      }
      // group
      algorithm->FeatureGroupingAlgorithm::group(maps, out_map);

      // set file descriptions:
      bool keep_subelements = getFlag_("keep_subelements");
      if (!keep_subelements)
      {
        for (Size i = 0; i < ins.size(); ++i)
        {
          out_map.getFileDescriptions()[i].filename = ins[i];
          out_map.getFileDescriptions()[i].size = maps[i].size();
          out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId();
        }
      }
      else
      {
        // components of the output map are not the input maps themselves, but
        // the components of the input maps:
        algorithm->transferSubelements(maps, out_map);
      }
    }

    // assign unique ids
    out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId);

    // annotate output with data processing info
    addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING));

    out_map.setPrimaryMSRunPath(ms_run_locations);
    // write output
    ConsensusXMLFile().store(out, out_map);

    // some statistics
    map<Size, UInt> num_consfeat_of_size;
    for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit)
    {
      ++num_consfeat_of_size[cmit->size()];
    }

    LOG_INFO << "Number of consensus features:" << endl;
    for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i)
    {
      LOG_INFO << "  of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl;
    }
    LOG_INFO << "  total:      " << setw(6) << out_map.size() << endl;

    delete algorithm;

    return EXECUTION_OK;
  }
  /// @brief extracts the iTRAQ channels from the MS data and stores intensity values in a consensus map
  ///
  /// @param ms_exp_data Raw data to read
  /// @param consensus_map Output each MS² scan as a consensus feature
  /// @throws Exception::MissingInformation if no scans present or MS² scan has no precursor
  void ItraqChannelExtractor::run(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map)
  {
    if (ms_exp_data.empty())
    {
      LOG_WARN << "The given file does not contain any conventional peak data, but might"
                  " contain chromatograms. This tool currently cannot handle them, sorry.";
      throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!");
    }

    MSExperiment<> ms_exp_MS2;

    String mode = (String) param_.getValue("select_activation");
    std::cout << "Selecting scans with activation mode: " << (mode == "" ? "any" : mode) << "\n";
    HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(ListUtils::create<String>(mode));

    for (size_t idx = 0; idx < ms_exp_data.size(); ++idx)
    {
      if (ms_exp_data[idx].getMSLevel() == 2)
      {
        if (mode == "" || activation_predicate(ms_exp_data[idx]))
        {
          // copy only MS² scans
          ms_exp_MS2.addSpectrum(ms_exp_data[idx]);
        }
        else
        {
          //std::cout << "deleting spectrum # " << idx << " with RT: " << ms_exp_data[idx].getRT() << "\n";
        }
      }
    }

#ifdef ITRAQ_DEBUG
    std::cout << "we have " << ms_exp_MS2.size() << " scans left of level " << ms_exp_MS2[0].getMSLevel() << std::endl;
    std::cout << "run: channel_map_ has " << channel_map_.size() << " entries!" << std::endl;
#endif
    consensus_map.clear(false);
    // set <mapList> header
    Int index_cnt = 0;
    for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it)
    {
      // structure of Map cm_it
      //  first == channel-name as Int e.g. 114
      //  second == ChannelInfo struct
      ConsensusMap::FileDescription channel_as_map;
      // label is the channel + description provided in the Params
      if (itraq_type_ != TMT_SIXPLEX)
        channel_as_map.label = "iTRAQ_" + String(cm_it->second.name) + "_" + String(cm_it->second.description);
      else
        channel_as_map.label = "TMT_" + String(cm_it->second.name) + "_" + String(cm_it->second.description);

      channel_as_map.size = ms_exp_MS2.size();
      //TODO what about .filename? leave empty?
      // add some more MetaInfo
      channel_as_map.setMetaValue("channel_name", cm_it->second.name);
      channel_as_map.setMetaValue("channel_id", cm_it->second.id);
      channel_as_map.setMetaValue("channel_description", cm_it->second.description);
      channel_as_map.setMetaValue("channel_center", cm_it->second.center);
      channel_as_map.setMetaValue("channel_active", String(cm_it->second.active ? "true" : "false"));
      consensus_map.getFileDescriptions()[index_cnt++] = channel_as_map;
    }

    // create consensusElements

    Peak2D::CoordinateType allowed_deviation = (Peak2D::CoordinateType) param_.getValue("reporter_mass_shift");
    // now we have picked data
    // --> assign peaks to channels
    UInt element_index(0);

    for (MSExperiment<>::ConstIterator it = ms_exp_MS2.begin(); it != ms_exp_MS2.end(); ++it)
    {
      // store RT&MZ of parent ion as centroid of ConsensusFeature
      ConsensusFeature cf;
      cf.setUniqueId();
      cf.setRT(it->getRT());
      if (it->getPrecursors().size() >= 1)
      {
        cf.setMZ(it->getPrecursors()[0].getMZ());
      }
      else
      {
        throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + String(it->getNativeID()) + " with RT " + String(it->getRT()));
      }

      Peak2D channel_value;
      channel_value.setRT(it->getRT());
      // for each each channel
      Int index = 0;
      Peak2D::IntensityType overall_intensity = 0;
      for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it)
      {
        // set mz-position of channel
        channel_value.setMZ(cm_it->second.center);
        // reset intensity
        channel_value.setIntensity(0);

        //add up all signals
        for (MSExperiment<>::SpectrumType::ConstIterator mz_it =
               it->MZBegin(cm_it->second.center - allowed_deviation)
             ; mz_it != it->MZEnd(cm_it->second.center + allowed_deviation)
             ; ++mz_it
             )
        {
          channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity());
        }

        overall_intensity += channel_value.getIntensity();

        // add channel to ConsensusFeature
        cf.insert(index++, channel_value, element_index);

      } // ! channel_iterator


      // check featureHandles are not empty
      if (overall_intensity == 0)
      {
        cf.setMetaValue("all_empty", String("true"));
      }
      cf.setIntensity(overall_intensity);
      consensus_map.push_back(cf);

      // the tandem-scan in the order they appear in the experiment
      ++element_index;
    } // ! Experiment iterator


#ifdef ITRAQ_DEBUG
    std::cout << "processed " << element_index << " scans" << std::endl;
#endif

    consensus_map.setExperimentType("itraq");

    return;
  }
Ejemplo n.º 11
0
  ExitCodes outputTo(ostream& os)
  {
    //-------------------------------------------------------------
    // Parameter handling
    //-------------------------------------------------------------

    // File names
    String in = getStringOption_("in");

    // File type
    FileHandler fh;
    FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type"));

    if (in_type == FileTypes::UNKNOWN)
    {
      in_type = fh.getType(in);
      writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2);
    }

    if (in_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine input file type!");
      return PARSE_ERROR;
    }

    MSExperiment<Peak1D> exp;
    FeatureMap feat;
    ConsensusMap cons;

    if (in_type == FileTypes::FEATUREXML) //features
    {
      FeatureXMLFile().load(in, feat);
      feat.updateRanges();
    }
    else if (in_type == FileTypes::CONSENSUSXML)     //consensus features
    {
      ConsensusXMLFile().load(in, cons);
      cons.updateRanges();
    }

    //-------------------------------------------------------------
    // meta information
    //-------------------------------------------------------------
    if (getFlag_("m"))
    {
      os << endl
         << "-- General information --" << endl
         << endl
         << "file name: " << in << endl
         << "file type: " <<  FileTypes::typeToName(in_type) << endl;

      //basic info
      os << endl
         << "-- Meta information --" << endl
         << endl;

      if (in_type == FileTypes::FEATUREXML) //features
      {
        os << "Document id       : " << feat.getIdentifier() << endl << endl;
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        os << "Document id       : " << cons.getIdentifier() << endl << endl;
      }
    }

    //-------------------------------------------------------------
    // data processing
    //-------------------------------------------------------------
    if (getFlag_("p"))
    {
      //basic info
      os << endl
         << "-- Data processing information --" << endl
         << endl;

      //get data processing info
      vector<DataProcessing> dp;
      if (in_type == FileTypes::FEATUREXML) //features
      {
        dp = feat.getDataProcessing();
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        dp = cons.getDataProcessing();
      }
      int i = 0;
      for (vector<DataProcessing>::iterator it = dp.begin(); it != dp.end(); ++it)
      {
        os << "Data processing " << i << endl;
        os << "\tcompletion_time:   " << (*it).getCompletionTime().getDate() << 'T' << (*it).getCompletionTime().getTime() << endl;
        os << "\tsoftware name:     " << (*it).getSoftware().getName() << " version " << (*it).getSoftware().getVersion() << endl;
        for (set<DataProcessing::ProcessingAction>::const_iterator paIt = (*it).getProcessingActions().begin(); paIt != (*it).getProcessingActions().end(); ++paIt)
        {
          os << "\t\tprocessing action: " << DataProcessing::NamesOfProcessingAction[*paIt] << endl;
        }
      }
      ++i;
    }

    //-------------------------------------------------------------
    // statistics
    //-------------------------------------------------------------
    if (getFlag_("s"))
    {
      //-------------------------------------------------------------
      // Content statistics
      //-------------------------------------------------------------
      Map<String, int> meta_names;
      if (in_type == FileTypes::FEATUREXML) //features
      {
        os << "Number of features: " << feat.size() << endl
           << endl
           << "Ranges:" << endl
           << "  retention time:  " << String::number(feat.getMin()[Peak2D::RT], 2) << " : " << String::number(feat.getMax()[Peak2D::RT], 2) << endl
           << "  mass-to-charge:  " << String::number(feat.getMin()[Peak2D::MZ], 2) << " : " << String::number(feat.getMax()[Peak2D::MZ], 2) << endl
           << "  intensity:       " << String::number(feat.getMinInt(), 2) << " : " << String::number(feat.getMaxInt(), 2) << endl
           << endl;

        // Charge distribution
        Map<UInt, UInt> charges;
        for (Size i = 0; i < feat.size(); ++i)
        {
          charges[feat[i].getCharge()]++;
        }

        os << "Charge distribution" << endl;
        for (Map<UInt, UInt>::const_iterator it = charges.begin();
             it != charges.end(); ++it)
        {
          os << "charge " << it->first << ": " << it->second << endl;
        }
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        map<Size, UInt> num_consfeat_of_size;
        for (ConsensusMap::const_iterator cmit = cons.begin();
             cmit != cons.end(); ++cmit)
        {
          ++num_consfeat_of_size[cmit->size()];
        }

        os << endl << "Number of consensus features:" << endl;
        for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i)
        {
          os << "  of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl;
        }
        os << "  total:      " << setw(6) << cons.size() << endl << endl;

        os << "Ranges:" << endl
           << "  retention time:  " << String::number(cons.getMin()[Peak2D::RT], 2) << " : " << String::number(cons.getMax()[Peak2D::RT], 2) << endl
           << "  mass-to-charge:  " << String::number(cons.getMin()[Peak2D::MZ], 2) << " : " << String::number(cons.getMax()[Peak2D::MZ], 2) << endl
           << "  intensity:       " << String::number(cons.getMinInt(), 2) << " : " << String::number(cons.getMaxInt(), 2) << endl;

        // file descriptions
        const ConsensusMap::FileDescriptions& descs = cons.getFileDescriptions();
        if (!descs.empty())
        {
          os << endl <<
          "File descriptions:" << endl;
          for (ConsensusMap::FileDescriptions::const_iterator it = descs.begin(); it != descs.end(); ++it)
          {
            os << " - " << it->second.filename << endl
               << "   identifier: " << it->first << endl
               << "   label     : " << it->second.label << endl
               << "   size      : " << it->second.size << endl;
          }
        }
      }

      os << endl
         << "-- Summary Statistics --" << endl
         << endl;

    }

    if (in_type == FileTypes::FEATUREXML) //features
    {
      feat.sortByRT();

      vector<double> slice_stats;
      Size n = getIntOption_("n");

      Size begin = 0;
      Size end = 0;
      os << "#slice\tRT_begin\tRT_end\tnumber_of_features\ttic\t"
         << "int_mean\tint_stddev\tint_min\tint_max\tint_median\tint_lowerq\tint_upperq\t"
         << "mz_mean\tmz_stddev\tmz_min\tmz_max\tmz_median\tmz_lowerq\tmz_upperq\t"
         << "width_mean\twidth_stddev\twidth_min\twidth_max\twidth_median\twidth_lowerq\twidth_upperq\t"
         << "qual_mean\tqual_stddev\tqual_min\tqual_max\tqual_median\tqual_lowerq\tqual_upperq\t"
         << "rt_qual_mean\trt_qual_stddev\trt_qual_min\trt_qual_max\trt_qual_median\trt_qual_lowerq\trt_qual_upperq\t"
         << "mz_qual_mean\tmz_qual_stddev\tmz_qual_min\tmz_qual_max\tmz_qual_median\tmz_qual_lowerq\tmz_qual_upperq"
         << endl;

      double rt_begin = 0.0;
      for (Size slice = 0; slice < n; ++slice)
      {
        // Determine slice boundaries.
        double rt_end = feat.back().getRT() / (double)n * (slice + 1);
        for (end = begin; end < feat.size() && feat[end].getRT() < rt_end; ++end) {}

        // Compute statistics on all features in this slice.
        slice_stats = sliceStatistics(feat, begin, end);

        // Write the beginning and end of the slices to the output as well as the slice index.
        os << slice << "\t" << rt_begin << "\t" << rt_end << "\t" << end - begin << "\t";

        // Write the statistics as a line of an csv file
        copy(slice_stats.begin(), slice_stats.end(), ostream_iterator<double>(os, "\t"));
        os << endl;

        begin = end;
        rt_begin = rt_end;
      }
    }
    else if (in_type == FileTypes::CONSENSUSXML)     //consensus features
    {
      Size size = cons.size();

      vector<double> intensities;
      intensities.reserve(size);
      vector<double> qualities(size);
      qualities.reserve(size);
      vector<double> widths(size);
      widths.reserve(size);

      vector<double> rt_delta_by_elems;
      vector<double> rt_aad_by_elems;
      vector<double> rt_aad_by_cfs;
      rt_aad_by_cfs.reserve(size);

      vector<double> mz_delta_by_elems;
      vector<double> mz_aad_by_elems;
      vector<double> mz_aad_by_cfs;
      mz_aad_by_cfs.reserve(size);

      vector<double> it_delta_by_elems;
      vector<double> it_aad_by_elems;
      vector<double> it_aad_by_cfs;
      it_aad_by_cfs.reserve(size);

      for (ConsensusMap::const_iterator cm_iter = cons.begin();
           cm_iter != cons.end(); ++cm_iter)
      {
        double rt_aad = 0;
        double mz_aad = 0;
        double it_aad = 0;
        intensities.push_back(cm_iter->getIntensity());
        qualities.push_back(cm_iter->getQuality());
        widths.push_back(cm_iter->getWidth());
        for (ConsensusFeature::HandleSetType::const_iterator hs_iter = cm_iter->begin();
             hs_iter != cm_iter->end(); ++hs_iter)
        {
          double rt_diff = hs_iter->getRT() - cm_iter->getRT();
          rt_delta_by_elems.push_back(rt_diff);
          if (rt_diff < 0)
          {
            rt_diff = -rt_diff;
          }
          rt_aad_by_elems.push_back(rt_diff);
          rt_aad += rt_diff;
          double mz_diff = hs_iter->getMZ() - cm_iter->getMZ();
          mz_delta_by_elems.push_back(mz_diff);
          if (mz_diff < 0)
          {
            mz_diff = -mz_diff;
          }
          mz_aad_by_elems.push_back(mz_diff);
          mz_aad += mz_diff;
          double it_ratio = hs_iter->getIntensity() / (cm_iter->getIntensity() ? cm_iter->getIntensity() : 1.);
          it_delta_by_elems.push_back(it_ratio);
          if (it_ratio < 1.)
          {
            it_ratio = 1. / it_ratio;
          }
          it_aad_by_elems.push_back(it_ratio);
          it_aad += it_ratio;
        }
        if (!cm_iter->empty())
        {
          rt_aad /= cm_iter->size();
          mz_aad /= cm_iter->size();
          it_aad /= cm_iter->size();
        } // otherwise rt_aad etc. are 0 anyway
        rt_aad_by_cfs.push_back(rt_aad);
        mz_aad_by_cfs.push_back(mz_aad);
        it_aad_by_cfs.push_back(it_aad);
      }

      OpenMS::SomeStatistics some_statistics;

      os.precision(writtenDigits(ConsensusFeature::IntensityType()));
      os << "Intensities of consensus features:" << endl << some_statistics(intensities) << endl;

      os.precision(writtenDigits(ConsensusFeature::QualityType()));
      os << "Qualities of consensus features:" << endl << some_statistics(qualities) << endl;

      os.precision(writtenDigits(ConsensusFeature::CoordinateType()));
      os << "Retention time differences ( element-center, weight 1 per element):" << endl << some_statistics(rt_delta_by_elems) << endl;
      os << "Absolute retention time differences ( |element-center|, weight 1 per element):" << endl << some_statistics(rt_aad_by_elems) << endl;
      os << "Average absolute differences of retention time within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(rt_aad_by_cfs) << endl;

      os.precision(writtenDigits(ConsensusFeature::CoordinateType()));
      os << "Mass-to-charge differences ( element-center, weight 1 per element):" << endl << some_statistics(mz_delta_by_elems) << endl;
      os << "Absolute differences of mass-to-charge ( |element-center|, weight 1 per element):" << endl << some_statistics(mz_aad_by_elems) << endl;
      os << "Average absolute differences of mass-to-charge within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(mz_aad_by_cfs) << endl;

      os.precision(writtenDigits(ConsensusFeature::IntensityType()));
      os << "Intensity ratios ( element/center, weight 1 per element):" << endl << some_statistics(it_delta_by_elems) << endl;
      os << "Relative intensity error ( max{(element/center),(center/element)}, weight 1 per element):" << endl << some_statistics(it_aad_by_elems) << endl;
      os << "Average relative intensity error within consensus features ( max{(element/center),(center/element)}, weight 1 per consensus features):" << endl << some_statistics(it_aad_by_cfs) << endl;
    }

    return EXECUTION_OK;
  }
Ejemplo n.º 12
0
    pItraq.setValue("channel_116_description", "else");
    q_method->setParameters(pItraq);

    IsobaricChannelExtractor ice(q_method);

    // disable activation filtering
    Param p = ice.getParameters();
    p.setValue("select_activation", "");
    ice.setParameters(p);

    // extract channels
    ConsensusMap cm_out;
    ice.extractChannels(exp, cm_out);

    // check channel meta information
    TEST_EQUAL(cm_out.getFileDescriptions().size(), 4)
    ABORT_IF(cm_out.getFileDescriptions().size() != 4)

    TEST_EQUAL(cm_out.getFileDescriptions()[0].label, "itraq4plex_114")
    TEST_EQUAL(cm_out.getFileDescriptions()[0].getMetaValue("channel_name"), 114)
    TEST_EQUAL(cm_out.getFileDescriptions()[0].getMetaValue("channel_id"), 0)
    TEST_EQUAL(cm_out.getFileDescriptions()[0].getMetaValue("channel_description"), "ref")
    TEST_EQUAL(cm_out.getFileDescriptions()[0].getMetaValue("channel_center"), 114.1112)

    TEST_EQUAL(cm_out.getFileDescriptions()[1].label, "itraq4plex_115")
    TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_name"), 115)
    TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_id"), 1)
    TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_description"), "something")
    TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_center"), 115.1082)

    TEST_EQUAL(cm_out.getFileDescriptions()[2].label, "itraq4plex_116")
Ejemplo n.º 13
0
  void LabeledPairFinder::run(const vector<ConsensusMap>& input_maps, ConsensusMap& result_map)
  {
    if (input_maps.size() != 1)
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "exactly one input map required");
    if (result_map.getFileDescriptions().size() != 2)
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "two file descriptions required");
    if (result_map.getFileDescriptions().begin()->second.filename != result_map.getFileDescriptions().rbegin()->second.filename)
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the two file descriptions have to contain the same file name");
    checkIds_(input_maps);

    //look up the light and heavy index
    Size light_index = numeric_limits<Size>::max();
    Size heavy_index = numeric_limits<Size>::max();
    for (ConsensusMap::FileDescriptions::const_iterator it = result_map.getFileDescriptions().begin();
         it != result_map.getFileDescriptions().end();
         ++it)
    {
      if (it->second.label == "heavy")
      {
        heavy_index = it->first;
      }
      else if (it->second.label == "light")
      {
        light_index = it->first;
      }
    }
    if (light_index == numeric_limits<Size>::max() || heavy_index == numeric_limits<Size>::max())
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the input maps have to be labeled 'light' and 'heavy'");
    }

    result_map.clear(false);

    // sort consensus features by RT (and MZ) to speed up searching afterwards
    typedef ConstRefVector<ConsensusMap> RefMap;
    RefMap model_ref(input_maps[0].begin(), input_maps[0].end());
    model_ref.sortByPosition();

    //calculate matches
    ConsensusMap matches;
    //settings
    double rt_pair_dist = param_.getValue("rt_pair_dist");
    double rt_dev_low = param_.getValue("rt_dev_low");
    double rt_dev_high = param_.getValue("rt_dev_high");
    double mz_dev = param_.getValue("mz_dev");
    DoubleList mz_pair_dists = param_.getValue("mz_pair_dists");
    bool mrm = param_.getValue("mrm").toBool();

    //estimate RT parameters
    if (param_.getValue("rt_estimate") == "true")
    {
      //find all possible RT distances of features with the same charge and a good m/z distance
      vector<double> dists;
      dists.reserve(model_ref.size());
      for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it)
      {
        for (RefMap::const_iterator it2 = model_ref.begin(); it2 != model_ref.end(); ++it2)
        {
          for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it)
          {
            double mz_pair_dist = *dist_it;
            if (it2->getCharge() == it->getCharge()
               && it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev
               && it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev)
            {
              dists.push_back(it2->getRT() - it->getRT());
            }
          }
        }
      }
      if (dists.empty())
      {
        cout << "Warning: Could not find pairs for RT distance estimation. The manual settings are used!" << endl;
      }
      else
      {
        if (dists.size() < 50)
        {
          cout << "Warning: Found only " << dists.size() << " pairs. The estimated shift and std deviation are probably not reliable!" << endl;
        }
        //--------------------------- estimate initial parameters of fit ---------------------------
        GaussFitter::GaussFitResult result(-1, -1, -1);
        //first estimate of the optimal shift: median of the distances
        sort(dists.begin(), dists.end());
        Size median_index = dists.size() / 2;
        result.x0 = dists[median_index];
        //create histogram of distances
        //consider only the maximum of pairs, centered around the optimal shift
        Size max_pairs = model_ref.size() / 2;
        Size start_index = (Size) max((SignedSize)0, (SignedSize)(median_index - max_pairs / 2));
        Size end_index = (Size) min((SignedSize)(dists.size() - 1), (SignedSize)(median_index + max_pairs / 2));
        double start_value = dists[start_index];
        double end_value = dists[end_index];
        double bin_step = fabs(end_value - start_value) / 99.999; //ensure that we have 100 bins
        Math::Histogram<> hist(start_value, end_value, bin_step);
        //std::cout << "HIST from " << start_value << " to " << end_value << " (bin size " << bin_step << ")" << endl;
        for (Size i = start_index; i <= end_index; ++i)
        {
          hist.inc(dists[i]);
        }
        //cout << hist << endl;
        dists.clear();
        //determine median of bins (uniform background distribution)
        vector<Size> bins(hist.begin(), hist.end());
        sort(bins.begin(), bins.end());
        Size bin_median = bins[bins.size() / 2];
        bins.clear();
        //estimate scale A: maximum of the histogram
        Size max_value = hist.maxValue();
        result.A = max_value - bin_median;
        //overwrite estimate of x0 with the position of the highest bin
        for (Size i = 0; i < hist.size(); ++i)
        {
          if (hist[i] == max_value)
          {
            result.x0 = hist.centerOfBin(i);
            break;
          }
        }
        //estimate sigma: first time the count is less or equal the median count in the histogram
        double pos = result.x0;
        while (pos > start_value && hist.binValue(pos) > bin_median)
        {
          pos -= bin_step;
        }
        double sigma_low =  result.x0 - pos;
        pos = result.x0;
        while (pos<end_value&& hist.binValue(pos)> bin_median)
        {
          pos += bin_step;
        }
        double sigma_high = pos - result.x0;
        result.sigma = (sigma_high + sigma_low) / 6.0;
        //cout << "estimated optimal RT distance (before fit): " << result.x0 << endl;
        //cout << "estimated allowed deviation (before fit): " << result.sigma*3.0 << endl;
        //--------------------------- do gauss fit ---------------------------
        vector<DPosition<2> > points(hist.size());
        for (Size i = 0; i < hist.size(); ++i)
        {
          points[i][0] = hist.centerOfBin(i);
          points[i][1] = max(0u, hist[i]);
        }
        GaussFitter fitter;
        fitter.setInitialParameters(result);
        result = fitter.fit(points);
        cout << "estimated optimal RT distance: " << result.x0 << endl;
        cout << "estimated allowed deviation: " << fabs(result.sigma) * 3.0 << endl;
        rt_pair_dist = result.x0;
        rt_dev_low = fabs(result.sigma) * 3.0;
        rt_dev_high = fabs(result.sigma) * 3.0;
      }
    }


    // check each feature
    for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it)
    {
      for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it)
      {
        double mz_pair_dist = *dist_it;
        RefMap::const_iterator it2 = lower_bound(model_ref.begin(), model_ref.end(), it->getRT() + rt_pair_dist - rt_dev_low, ConsensusFeature::RTLess());
        while (it2 != model_ref.end() && it2->getRT() <= it->getRT() + rt_pair_dist + rt_dev_high)
        {
          // if in mrm mode, we need to compare precursor mass difference and fragment mass difference, charge remains the same

          double prec_mz_diff(0);
          if (mrm)
          {
            prec_mz_diff = fabs((double)it2->getMetaValue("MZ") - (double)it->getMetaValue("MZ"));
            if (it->getCharge() != 0)
            {
              prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist / it->getCharge());
            }
            else
            {
              prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist);
            }
          }

          bool mrm_correct_dist(false);
          double frag_mz_diff = fabs(it->getMZ() - it2->getMZ());

          //cerr << it->getRT() << " charge1=" << it->getCharge() << ", charge2=" << it2->getCharge() << ", prec_diff=" << prec_mz_diff << ", frag_diff=" << frag_mz_diff << endl;

          if (mrm &&
              it2->getCharge() == it->getCharge() &&
              prec_mz_diff < mz_dev &&
              (frag_mz_diff < mz_dev || fabs(frag_mz_diff - mz_pair_dist) < mz_dev))
          {
            mrm_correct_dist = true;
            //cerr << "mrm_correct_dist" << endl;
          }

          if ((mrm && mrm_correct_dist) || (!mrm &&
                                            it2->getCharge() == it->getCharge() &&
                                            it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev &&
                                            it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev
                                            ))
          {
            //cerr << "dist correct" << endl;
            double score = sqrt(
              PValue_(it2->getMZ() - it->getMZ(), mz_pair_dist / it->getCharge(), mz_dev, mz_dev) *
              PValue_(it2->getRT() - it->getRT(), rt_pair_dist, rt_dev_low, rt_dev_high)
              );

            // Note: we used to copy the id from the light feature here, but that strategy does not generalize to more than two labels.
            // We might want to report consensus features where the light one is missing but more than one heavier variant was found.
            // Also, the old strategy is inconsistent with what was done in the unlabeled case.  Thus now we assign a new unique id here.
            matches.push_back(ConsensusFeature());
            matches.back().setUniqueId();

            matches.back().insert(light_index, *it);
            matches.back().clearMetaInfo();
            matches.back().insert(heavy_index, *it2);
            matches.back().setQuality(score);
            matches.back().setCharge(it->getCharge());
            matches.back().computeMonoisotopicConsensus();
          }
          ++it2;
        }
      }
    }

    //compute best pairs
    // - sort matches by quality
    // - take highest-quality matches first (greedy) and mark them as used
    set<Size> used_features;
    matches.sortByQuality(true);
    for (ConsensusMap::const_iterator match = matches.begin(); match != matches.end(); ++match)
    {
      //check if features are not used yet
      if (used_features.find(match->begin()->getUniqueId()) == used_features.end() &&
          used_features.find(match->rbegin()->getUniqueId()) == used_features.end()
          )
      {
        //if unused, add it to the final set of elements
        result_map.push_back(*match);
        used_features.insert(match->begin()->getUniqueId());
        used_features.insert(match->rbegin()->getUniqueId());
      }
    }

    //Add protein identifications to result map
    for (Size i = 0; i < input_maps.size(); ++i)
    {
      result_map.getProteinIdentifications().insert(result_map.getProteinIdentifications().end(), input_maps[i].getProteinIdentifications().begin(), input_maps[i].getProteinIdentifications().end());
    }

    //Add unassigned peptide identifications to result map
    for (Size i = 0; i < input_maps.size(); ++i)
    {
      result_map.getUnassignedPeptideIdentifications().insert(result_map.getUnassignedPeptideIdentifications().end(), input_maps[i].getUnassignedPeptideIdentifications().begin(), input_maps[i].getUnassignedPeptideIdentifications().end());
    }

    // Very useful for checking the results, and the ids have no real meaning anyway
    result_map.sortByMZ();
  }
Ejemplo n.º 14
0
  void EDTAFile::load(const String& filename, ConsensusMap& consensus_map)
  {
    // load input
    TextFile input(filename);
    TextFile::ConstIterator input_it = input.begin();

    // reset map
    consensus_map = ConsensusMap();
    consensus_map.setUniqueId();

    char separator = ' ';
    if (input_it->hasSubstring("\t"))
      separator = '\t';
    else if (input_it->hasSubstring(" "))
      separator = ' ';
    else if (input_it->hasSubstring(","))
      separator = ',';

    // parsing header line
    std::vector<String> headers;
    input_it->split(separator, headers);
    int offset = 0;
    for (Size i = 0; i < headers.size(); ++i)
    {
      headers[i].trim();
    }
    String header_trimmed = *input.begin();
    header_trimmed.trim();

    enum
    {
      TYPE_UNDEFINED,
      TYPE_OLD_NOCHARGE,
      TYPE_OLD_CHARGE,
      TYPE_CONSENSUS
    }
    input_type = TYPE_UNDEFINED;
    Size input_features = 1;

    double rt = 0.0;
    double mz = 0.0;
    double it = 0.0;
    Int ch = 0;

    if (headers.size() <= 2)
    {
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: not enough columns! Expected at least 3 columns!\nOffending line: '") + header_trimmed + "'  (line 1)\n");
    }
    else if (headers.size() == 3)
      input_type = TYPE_OLD_NOCHARGE;
    else if (headers.size() == 4)
      input_type = TYPE_OLD_CHARGE;

    // see if we have a header
    try
    {
      // try to convert... if not: thats a header
      rt = headers[0].toDouble();
      mz = headers[1].toDouble();
      it = headers[2].toDouble();
    }
    catch (Exception::BaseException&)
    {
      offset = 1;
      ++input_it;
      LOG_INFO << "Detected a header line.\n";
    }

    if (headers.size() >= 5)
    {
      if (String(headers[4].trim()).toUpper() == "RT1")
        input_type = TYPE_CONSENSUS;
      else
        input_type = TYPE_OLD_CHARGE;
    }
    if (input_type == TYPE_CONSENSUS)
    {
      // Every consensus style line includes features with four columns.
      // The remainder is meta data
      input_features = headers.size() / 4;
    }

    if (offset == 0 && (input_type == TYPE_OLD_CHARGE || input_type == TYPE_CONSENSUS))
    {
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: No HEADER provided. This is only allowed for three columns. You have more!\nOffending line: '") + header_trimmed + "'  (line 1)\n");
    }

    SignedSize input_size = input.end() - input.begin();

    ConsensusMap::FileDescription desc;
    desc.filename = filename;
    desc.size = (input_size) - offset;
    consensus_map.getFileDescriptions()[0] = desc;

    // parsing features
    consensus_map.reserve(input_size);

    for (; input_it != input.end(); ++input_it)
    {
      //do nothing for empty lines
      String line_trimmed = *input_it;
      line_trimmed.trim();
      if (line_trimmed == "")
      {
        if ((input_it - input.begin()) < input_size - 1) LOG_WARN << "Notice: Empty line ignored (line " << ((input_it - input.begin()) + 1) << ").";
        continue;
      }

      //split line to tokens
      std::vector<String> parts;
      input_it->split(separator, parts);

      //abort if line does not contain enough fields
      if (parts.size() < 3)
      {
        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "",
                                    String("Failed parsing in line ")
                                    + String((input_it - input.begin()) + 1)
                                    + ": At least three columns are needed! (got  "
                                    + String(parts.size())
                                    + ")\nOffending line: '"
                                    + line_trimmed
                                    + "'  (line "
                                    + String((input_it - input.begin()) + 1)
                                    + ")\n");
      }

      ConsensusFeature cf;
      cf.setUniqueId();

      try
      {
        // Convert values. Will return -1 if not available.
        rt = checkedToDouble_(parts, 0);
        mz = checkedToDouble_(parts, 1);
        it = checkedToDouble_(parts, 2);
        ch = checkedToInt_(parts, 3);

        cf.setRT(rt);
        cf.setMZ(mz);
        cf.setIntensity(it);
        if (input_type != TYPE_OLD_NOCHARGE)
          cf.setCharge(ch);
      }
      catch (Exception::BaseException&)
      {
        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert the first three columns to a number!\nOffending line: '" + line_trimmed + "'  (line " + String((input_it - input.begin()) + 1) + ")\n");
      }

      // Check all features in one line
      for (Size j = 1; j < input_features; ++j)
      {
        try
        {
          Feature f;
          f.setUniqueId();

          // Convert values. Will return -1 if not available.
          rt = checkedToDouble_(parts, j * 4 + 0);
          mz = checkedToDouble_(parts, j * 4 + 1);
          it = checkedToDouble_(parts, j * 4 + 2);
          ch = checkedToInt_(parts, j * 4 + 3);

          // Only accept features with at least RT and MZ set
          if (rt != -1 && mz != -1)
          {
            f.setRT(rt);
            f.setMZ(mz);
            f.setIntensity(it);
            f.setCharge(ch);

            cf.insert(j - 1, f);
          }
        }
        catch (Exception::BaseException&)
        {
          throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert one of the four sub-feature columns (starting at column " + (j * 4 + 1) + ") to a number! Is the correct separator specified?\nOffending line: '" + line_trimmed + "'  (line " + String((input_it - input.begin()) + 1) + ")\n");
        }
      }

      //parse meta data
      for (Size j = input_features * 4; j < parts.size(); ++j)
      {
        String part_trimmed = parts[j];
        part_trimmed.trim();
        if (part_trimmed != "")
        {
          //check if column name is ok
          if (headers.size() <= j || headers[j] == "")
          {
            throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "",
                                        String("Error: Missing meta data header for column ") + (j + 1) + "!"
                                        + String("Offending header line: '") + header_trimmed + "'  (line 1)");
          }
          //add meta value
          cf.setMetaValue(headers[j], part_trimmed);
        }
      }

      //insert feature to map
      consensus_map.push_back(cf);
    }

    // register FileDescriptions
    ConsensusMap::FileDescription fd;
    fd.filename = filename;
    fd.size = consensus_map.size();
    Size maps = std::max(input_features - 1, Size(1)); // its either a simple feature or a consensus map
    // (in this case the 'input_features' includes the centroid, which we do not count)
    for (Size i = 0; i < maps; ++i)
    {
      fd.label = String("EDTA_Map ") + String(i);
      consensus_map.getFileDescriptions()[i] = fd;
    }

  }
Ejemplo n.º 15
0
START_SECTION((void load(const String &filename, ConsensusMap & map)))
ConsensusMap map;
ConsensusXMLFile file;
file.load(OPENMS_GET_TEST_DATA_PATH("ConsensusXMLFile_1.consensusXML"), map);

//test DocumentIdentifier addition
TEST_STRING_EQUAL(map.getLoadedFilePath(), OPENMS_GET_TEST_DATA_PATH("ConsensusXMLFile_1.consensusXML"));
TEST_STRING_EQUAL(FileTypes::typeToName(map.getLoadedFileType()), "consensusXML");

//meta data
TEST_EQUAL(map.getIdentifier(), "lsid")
TEST_EQUAL(map.getExperimentType() == "label-free", true)
TEST_EQUAL(map.getMetaValue("name1") == DataValue("value1"), true)
TEST_EQUAL(map.getMetaValue("name2") == DataValue(2), true)
//file descriptions
TEST_EQUAL(map.getFileDescriptions()[0].filename == "data/MapAlignmentFeatureMap1.xml", true)
TEST_EQUAL(map.getFileDescriptions()[0].label, "label")
TEST_EQUAL(map.getFileDescriptions()[0].size, 144)
TEST_EQUAL(map.getFileDescriptions()[0].getMetaValue("name3") == DataValue("value3"), true)
TEST_EQUAL(map.getFileDescriptions()[0].getMetaValue("name4") == DataValue(4), true)
TEST_STRING_EQUAL(map.getFileDescriptions()[1].filename, "data/MapAlignmentFeatureMap2.xml")
TEST_EQUAL(map.getFileDescriptions()[1].label, "")
TEST_EQUAL(map.getFileDescriptions()[1].size, 0)
TEST_EQUAL(map.getFileDescriptions()[1].getMetaValue("name5") == DataValue("value5"), true)
TEST_EQUAL(map.getFileDescriptions()[1].getMetaValue("name6") == DataValue(6.0), true)
//data processing
TEST_EQUAL(map.getDataProcessing().size(), 2)
TEST_STRING_EQUAL(map.getDataProcessing()[0].getSoftware().getName(), "Software1")
TEST_STRING_EQUAL(map.getDataProcessing()[0].getSoftware().getVersion(), "0.91a")
TEST_EQUAL(map.getDataProcessing()[0].getProcessingActions().size(), 1)
TEST_EQUAL(map.getDataProcessing()[0].getProcessingActions().count(DataProcessing::DEISOTOPING), 1)
FGA* nullPointer = 0;
START_SECTION((FeatureGroupingAlgorithm()))
	ptr = new FGA();
	TEST_NOT_EQUAL(ptr, nullPointer)
END_SECTION

START_SECTION((virtual ~FeatureGroupingAlgorithm()))
	delete ptr;
END_SECTION

START_SECTION((virtual void group(const vector< FeatureMap > &maps, ConsensusMap &out)=0))
	FGA fga;
	vector< FeatureMap > in;
	ConsensusMap map;
	fga.group(in,map);
	TEST_EQUAL(map.getFileDescriptions()[0].filename, "bla")
END_SECTION

START_SECTION((static void registerChildren()))
{
	TEST_STRING_EQUAL(Factory<FeatureGroupingAlgorithm>::registeredProducts()[0],FeatureGroupingAlgorithmLabeled::getProductName());
	TEST_STRING_EQUAL(Factory<FeatureGroupingAlgorithm>::registeredProducts()[1],FeatureGroupingAlgorithmUnlabeled::getProductName());
	TEST_EQUAL(Factory<FeatureGroupingAlgorithm>::registeredProducts().size(), 3)
}
END_SECTION

START_SECTION((void transferSubelements(const vector<ConsensusMap>& maps, ConsensusMap& out) const))
{
	vector<ConsensusMap> maps(2);
	maps[0].getFileDescriptions()[0].filename = "file1";
	maps[0].getFileDescriptions()[0].size = 1;
			void group(const vector< FeatureMap >&, ConsensusMap& map)
			{
			  map.getFileDescriptions()[0].filename = "bla";
				map.getFileDescriptions()[0].size = 5;
			}
Ejemplo n.º 18
0
  void ProteinInference::infer_(ConsensusMap & consensus_map,
                                const size_t protein_idenfication_index,
                                const UInt reference_map)
  {

    ProteinIdentification & protein_ident = consensus_map.getProteinIdentifications()[protein_idenfication_index];
    for (size_t i = 0; i < protein_ident.getHits().size(); ++i)
    {
      // Protein Accession
      String accession = protein_ident.getHits()[i].getAccession();

      // consensus feature -> peptide hit
      Map<size_t, PeptideHit> consensus_to_peptide;

      // search for it in consensus elements:
      for (size_t i_cm = 0; i_cm < consensus_map.size(); ++i_cm)
      {
        std::vector<PeptideHit> peptide_hits;
        for (std::vector<PeptideIdentification>::iterator it_pepid = consensus_map[i_cm].getPeptideIdentifications().begin();
             it_pepid != consensus_map[i_cm].getPeptideIdentifications().end();
             ++it_pepid)
        {
          // are Protein- and PeptideIdentification from the same search engine run?
          if (it_pepid->getIdentifier() != protein_ident.getIdentifier())
            continue;

          std::vector<PeptideHit> peptide_hits_local;

          it_pepid->getReferencingHits(accession, peptide_hits_local);

          if (peptide_hits_local.empty())
            continue;

          if (sortByUnique_(peptide_hits_local, it_pepid->isHigherScoreBetter())) // we found a unique peptide
          {
            peptide_hits.push_back(peptide_hits_local[0]);
          }

        }

        // if several PeptideIdentifications (==Spectra) were assigned to current ConsensusElement
        // --> take the best (as above), e.g. in SILAC this could happen
        // TODO: better idea?
        if (!peptide_hits.empty())
        {
          if (sortByUnique_(peptide_hits, consensus_map[i_cm].getPeptideIdentifications()[0].isHigherScoreBetter())) //found a unique peptide for current ConsensusElement
          {
            consensus_to_peptide[i_cm] = peptide_hits[0];
#ifdef DEBUG_INFERENCE
            std::cout << "assign peptide " <<  peptide_hits[0].getSequence() << " to Protein " << accession << std::endl;
#endif
          }
        }

      }       // ! ConsensusMap loop

      // no peptides found that match current Protein
      if (consensus_to_peptide.empty())
        continue;

      // Use all matching ConsensusElements to derive a quantitation for current protein
      // build up ratios for every map vs reference
      double coverage = 0;
      Map<Size, std::vector<IntensityType> > ratios;

      // number of unique peptides pointing to current protein
      UInt coverage_count = (UInt)consensus_to_peptide.size();

      for (Map<size_t, PeptideHit>::iterator it_pephits = consensus_to_peptide.begin();
           it_pephits != consensus_to_peptide.end();
           ++it_pephits)
      {
        coverage += it_pephits->second.getSequence().size();
        const ConsensusFeature::HandleSetType & handles = consensus_map[it_pephits->first].getFeatures();
        //search if reference is present
        ConsensusFeature::HandleSetType::const_iterator it_ref = handles.end();
        for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin();
             it != handles.end();
             ++it)
        {
          if (it->getMapIndex() == reference_map)
          {
            it_ref = it;
            break;
          }
        }

        // did not find a reference
        // TODO assume intensity==0 instead??
        if (it_ref == handles.end())
          continue;

        for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin();
             it != handles.end();
             ++it)
        {
          ratios[it->getMapIndex()].push_back(it->getIntensity() / it_ref->getIntensity());
        }

      }

      // sort ratios map-wise and take median
      for (ConsensusMap::FileDescriptions::const_iterator it_file = consensus_map.getFileDescriptions().begin();
           it_file != consensus_map.getFileDescriptions().end();
           ++it_file)
      {
        if (ratios.has(it_file->first))
        {
          //sort intensity ratios for map #it_file->first
          std::sort(ratios[it_file->first].begin(), ratios[it_file->first].end());
          //take median
          IntensityType protein_ratio = ratios[it_file->first][ratios[it_file->first].size() / 2];

          //TODO if ratios have high variance emit a warning!

          protein_ident.getHits()[i].setMetaValue(String("ratio_") + String(it_file->first), protein_ratio);
        }

      }       // ! map loop

      // % coverage of protein by peptides
      coverage /= DoubleReal(protein_ident.getHits()[i].getSequence().size()) / 100;

      protein_ident.getHits()[i].setMetaValue("coverage", coverage);
      protein_ident.getHits()[i].setMetaValue("hits", coverage_count);

    }     // ! Protein loop



    // protein_to_peptides now contains the Protein -> Peptides mapping
    // lets estimate the

  }
  void FeatureGroupingAlgorithmUnlabeled::group(const std::vector<FeatureMap> & maps, ConsensusMap & out)
  {
    // check that the number of maps is ok
    if (maps.size() < 2)
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "At least two maps must be given!");
    }

    // define reference map (the one with most peaks)
    Size reference_map_index = 0;
    Size max_count = 0;
    for (Size m = 0; m < maps.size(); ++m)
    {
      if (maps[m].size() > max_count)
      {
        max_count = maps[m].size();
        reference_map_index = m;
      }
    }

    std::vector<ConsensusMap> input(2);

    // build a consensus map of the elements of the reference map (contains only singleton consensus elements)
    MapConversion::convert(reference_map_index, maps[reference_map_index],
                          input[0]);

    // loop over all other maps, extend the groups
    StablePairFinder pair_finder;
    pair_finder.setParameters(param_.copy("", true));

    for (Size i = 0; i < maps.size(); ++i)
    {
      if (i != reference_map_index)
      {
        MapConversion::convert(i, maps[i], input[1]);
        // compute the consensus of the reference map and map i
        ConsensusMap result;
        pair_finder.run(input, result);
        input[0].swap(result);
      }
    }

    // replace result with temporary map
    out.swap(input[0]);
    // copy back the input maps (they have been deleted while swapping)
    out.getFileDescriptions() = input[0].getFileDescriptions();

    // add protein IDs and unassigned peptide IDs to the result map here,
    // to keep the same order as the input maps (useful for output later)
    for (std::vector<FeatureMap>::const_iterator map_it = maps.begin();
         map_it != maps.end(); ++map_it)
    {
      // add protein identifications to result map
      out.getProteinIdentifications().insert(
        out.getProteinIdentifications().end(),
        map_it->getProteinIdentifications().begin(),
        map_it->getProteinIdentifications().end());

      // add unassigned peptide identifications to result map
      out.getUnassignedPeptideIdentifications().insert(
        out.getUnassignedPeptideIdentifications().end(),
        map_it->getUnassignedPeptideIdentifications().begin(),
        map_it->getUnassignedPeptideIdentifications().end());
    }

    // canonical ordering for checking the results, and the ids have no real meaning anyway
#if 1 // the way this was done in DelaunayPairFinder and StablePairFinder
    out.sortByMZ();
#else
    out.sortByQuality();
    out.sortByMaps();
    out.sortBySize();
#endif

    return;
  }
    ExitCodes main_(int, const char **)
    {
      String in = getStringOption_("in");
      StringList out = getStringList_("out");
      SeedListGenerator seed_gen;
      // results (actually just one result, except for consensusXML input):
      Map<UInt64, SeedListGenerator::SeedList> seed_lists;

      Size num_maps = 0;
      FileTypes::Type in_type = FileHandler::getType(in);

      if (in_type == FileTypes::CONSENSUSXML)
      {
        ConsensusMap consensus;
        ConsensusXMLFile().load(in, consensus);
        num_maps = consensus.getFileDescriptions().size();
        if (out.size() != num_maps)
        {
          writeLog_("Error: expected " + String(num_maps) +
                    " output filenames");
          return ILLEGAL_PARAMETERS;
        }
        seed_gen.generateSeedLists(consensus, seed_lists);
      }
      else if (out.size() > 1)
      {
        writeLog_("Error: expected only one output filename");
        return ILLEGAL_PARAMETERS;
      }
      else if (in_type == FileTypes::MZML)
      {
        MSExperiment<> experiment;
        MzMLFile().load(in, experiment);
        seed_gen.generateSeedList(experiment, seed_lists[0]);
      }
      else if (in_type == FileTypes::IDXML)
      {
        vector<ProteinIdentification> proteins;
        vector<PeptideIdentification> peptides;
        IdXMLFile().load(in, proteins, peptides);
        seed_gen.generateSeedList(peptides, seed_lists[0],
                                  getFlag_("use_peptide_mass"));
      }
      else if (in_type == FileTypes::FEATUREXML)
      {
        FeatureMap features;
        FeatureXMLFile().load(in, features);
        seed_gen.generateSeedList(
          features.getUnassignedPeptideIdentifications(), seed_lists[0]);
      }

      // output:
      num_maps = 0;
      for (Map<UInt64, SeedListGenerator::SeedList>::Iterator it =
             seed_lists.begin(); it != seed_lists.end(); ++it, ++num_maps)
      {
        FeatureMap features;
        seed_gen.convertSeedList(it->second, features);
        //annotate output with data processing info:
        addDataProcessing_(features, getProcessingInfo_(
                             DataProcessing::DATA_PROCESSING));
        FeatureXMLFile().store(out[num_maps], features);
      }

      return EXECUTION_OK;
    }