void MapAlignmentTransformer::transformSingleConsensusMap(ConsensusMap & cmap,
                                                            const TransformationDescription & trafo)
  {
    for (ConsensusMap::Iterator cmit = cmap.begin(); cmit != cmap.end();
         ++cmit)
    {
      applyToConsensusFeature_(*cmit, trafo);
    }

    // adapt RT values of unassigned peptides:
    if (!cmap.getUnassignedPeptideIdentifications().empty())
    {
      transformSinglePeptideIdentification(
        cmap.getUnassignedPeptideIdentifications(), trafo);
    }
  }
  void FeatureGroupingAlgorithmQT::group_(const vector<MapType>& maps,
                                          ConsensusMap& out)
  {
    // check that the number of maps is ok:
    if (maps.size() < 2)
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
                                       "At least two maps must be given!");
    }

    QTClusterFinder cluster_finder;
    cluster_finder.setParameters(param_.copy("", true));

    cluster_finder.run(maps, out);

    StringList ms_run_locations;

    // add protein IDs and unassigned peptide IDs to the result map here,
    // to keep the same order as the input maps (useful for output later):
    for (typename vector<MapType>::const_iterator map_it = maps.begin();
         map_it != maps.end(); ++map_it)
    {      
      // add protein identifications to result map:
      out.getProteinIdentifications().insert(
        out.getProteinIdentifications().end(),
        map_it->getProteinIdentifications().begin(),
        map_it->getProteinIdentifications().end());

      // add unassigned peptide identifications to result map:
      out.getUnassignedPeptideIdentifications().insert(
        out.getUnassignedPeptideIdentifications().end(),
        map_it->getUnassignedPeptideIdentifications().begin(),
        map_it->getUnassignedPeptideIdentifications().end());
    }

    // canonical ordering for checking the results:
    out.sortByQuality();
    out.sortByMaps();
    out.sortBySize();
    return;
  }
Beispiel #3
0
  void MetaDataBrowser::add(ConsensusMap & map)
  {
    //identifier
    add(static_cast<DocumentIdentifier &>(map));

    // protein identifications
    for (Size i = 0; i < map.getProteinIdentifications().size(); ++i)
    {
      add(map.getProteinIdentifications()[i]);
    }

    //unassigned peptide ids
    for (Size i = 0; i < map.getUnassignedPeptideIdentifications().size(); ++i)
    {
      add(map.getUnassignedPeptideIdentifications()[i]);
    }

    add(static_cast<MetaInfoInterface &>(map));

    treeview_->expandItem(treeview_->findItems(QString::number(0), Qt::MatchExactly, 1).first());
  }
Beispiel #4
0
  void IDMapper::annotate(ConsensusMap & map, const std::vector<PeptideIdentification> & ids, const std::vector<ProteinIdentification> & protein_ids, bool measure_from_subelements)
  {
    // validate "RT" and "MZ" metavalues exist
    checkHits_(ids);

    //append protein identifications to Map
    map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end());

    //keep track of assigned/unassigned peptide identifications
    std::map<Size, Size> assigned;

    // store which peptides fit which feature (and avoid double entries)
    // consensusMap -> {peptide_index}
    std::vector<std::set<size_t> > mapping(map.size());

    DoubleList mz_values;
    DoubleReal rt_pep;
    IntList charges;

    //iterate over the peptide IDs
    for (Size i = 0; i < ids.size(); ++i)
    {
      if (ids[i].getHits().empty())
        continue;

      getIDDetails_(ids[i], rt_pep, mz_values, charges);

      //iterate over the features
      for (Size cm_index = 0; cm_index < map.size(); ++cm_index)
      {
        // if set to TRUE, we leave the i_mz-loop as we added the whole ID with all hits
        bool was_added = false;       // was current pep-m/z matched?!

        // iterate over m/z values of pepIds
        for (Size i_mz = 0; i_mz < mz_values.size(); ++i_mz)
        {
          DoubleReal mz_pep = mz_values[i_mz];

          // charge states to use for checking:
          IntList current_charges;
          if (!ignore_charge_)
          {
            // if "mz_ref." is "precursor", we have only one m/z value to check,
            // but still one charge state per peptide hit that could match:
            if (mz_values.size() == 1)
            {
              current_charges = charges;
            }
            else
            {
              current_charges.push_back(charges[i_mz]);
            }
            current_charges.push_back(0);             // "not specified" always matches
          }

          //check if we compare distance from centroid or subelements
          if (!measure_from_subelements)
          {
            if (isMatch_(rt_pep - map[cm_index].getRT(), mz_pep, map[cm_index].getMZ()) && (ignore_charge_ || ListUtils::contains(current_charges, map[cm_index].getCharge())))
            {
              was_added = true;
              map[cm_index].getPeptideIdentifications().push_back(ids[i]);
              ++assigned[i];
            }
          }
          else
          {
            for (ConsensusFeature::HandleSetType::const_iterator it_handle = map[cm_index].getFeatures().begin();
                 it_handle != map[cm_index].getFeatures().end();
                 ++it_handle)
            {
              if (isMatch_(rt_pep - it_handle->getRT(), mz_pep, it_handle->getMZ())  && (ignore_charge_ || ListUtils::contains(current_charges, it_handle->getCharge())))
              {
                was_added = true;
                if (mapping[cm_index].count(i) == 0)
                {
                  map[cm_index].getPeptideIdentifications().push_back(ids[i]);
                  ++assigned[i];
                  mapping[cm_index].insert(i);
                }
                break;                 // we added this peptide already.. no need to check other handles
              }
            }
            // continue to here
          }

          if (was_added)
            break;

        }         // m/z values to check

        // break to here

      }       // features
    }     // Identifications


    Size matches_none(0);
    Size matches_single(0);
    Size matches_multi(0);

    //append unassigned peptide identifications
    for (Size i = 0; i < ids.size(); ++i)
    {
      if (assigned[i] == 0)
      {
        map.getUnassignedPeptideIdentifications().push_back(ids[i]);
        ++matches_none;
      }
      else if (assigned[i] == 1)
      {
        ++matches_single;
      }
      else if (assigned[i] > 1)
      {
        ++matches_multi;
      }
    }

    //some statistics output
    LOG_INFO << "Unassigned peptides: " << matches_none << "\n"
             << "Peptides assigned to exactly one feature: "
             << matches_single << "\n"
             << "Peptides assigned to multiple features: "
             << matches_multi << std::endl;

  }
  ExitCodes main_(int, const char **)
  {
    FeatureGroupingAlgorithmUnlabeled * algorithm = new FeatureGroupingAlgorithmUnlabeled();

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    StringList ins;
    ins = getStringList_("in");
    String out = getStringOption_("out");

    //-------------------------------------------------------------
    // check for valid input
    //-------------------------------------------------------------
    // check if all input files have the correct type
    FileTypes::Type file_type = FileHandler::getType(ins[0]);
    for (Size i = 0; i < ins.size(); ++i)
    {
      if (FileHandler::getType(ins[i]) != file_type)
      {
        writeLog_("Error: All input files must be of the same type!");
        return ILLEGAL_PARAMETERS;
      }
    }

    //-------------------------------------------------------------
    // set up algorithm
    //-------------------------------------------------------------
    Param algorithm_param = getParam_().copy("algorithm:", true);
    writeDebug_("Used algorithm parameters", algorithm_param, 3);
    algorithm->setParameters(algorithm_param);

    Size reference_index(0);
    //-------------------------------------------------------------
    // perform grouping
    //-------------------------------------------------------------
    // load input
    ConsensusMap out_map;
    StringList ms_run_locations;
    if (file_type == FileTypes::FEATUREXML)
    {
      // use map with highest number of features as reference:
      Size max_count(0);
      FeatureXMLFile f;
      for (Size i = 0; i < ins.size(); ++i)
      {
        Size s = f.loadSize(ins[i]);
        if (s > max_count)
        {
          max_count = s;
          reference_index = i;
        }
      }

      // Load reference map and input it to the algorithm
      UInt64 ref_id;
      Size ref_size;
      std::vector<PeptideIdentification> ref_pepids;
      std::vector<ProteinIdentification> ref_protids;
      {
        FeatureMap map_ref;
        FeatureXMLFile f_fxml_tmp;
        f_fxml_tmp.getOptions().setLoadConvexHull(false);
        f_fxml_tmp.getOptions().setLoadSubordinates(false);
        f_fxml_tmp.load(ins[reference_index], map_ref);
        algorithm->setReference(reference_index, map_ref);
        ref_id = map_ref.getUniqueId();
        ref_size = map_ref.size();
        ref_pepids = map_ref.getUnassignedPeptideIdentifications();
        ref_protids = map_ref.getProteinIdentifications();
      }

      ConsensusMap dummy;
      // go through all input files and add them to the result one by one
      for (Size i = 0; i < ins.size(); ++i)
      {

        FeatureXMLFile f_fxml_tmp;
        FeatureMap tmp_map;
        f_fxml_tmp.getOptions().setLoadConvexHull(false);
        f_fxml_tmp.getOptions().setLoadSubordinates(false);
        f_fxml_tmp.load(ins[i], tmp_map);

        // copy over information on the primary MS run
        StringList ms_runs;
        tmp_map.getPrimaryMSRunPath(ms_runs);
        ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());

        if (i != reference_index)
        {
          algorithm->addToGroup(i, tmp_map);

          // store some meta-data about the maps in the "dummy" object -> try to
          // keep the same order as they were given in the input independent of
          // which map is the reference.

          dummy.getFileDescriptions()[i].filename = ins[i];
          dummy.getFileDescriptions()[i].size = tmp_map.size();
          dummy.getFileDescriptions()[i].unique_id = tmp_map.getUniqueId();

          // add protein identifications to result map
          dummy.getProteinIdentifications().insert(
            dummy.getProteinIdentifications().end(),
            tmp_map.getProteinIdentifications().begin(),
            tmp_map.getProteinIdentifications().end());

          // add unassigned peptide identifications to result map
          dummy.getUnassignedPeptideIdentifications().insert(
            dummy.getUnassignedPeptideIdentifications().end(),
            tmp_map.getUnassignedPeptideIdentifications().begin(),
            tmp_map.getUnassignedPeptideIdentifications().end());
        }
        else
        {
          // copy the meta-data from the refernce map
          dummy.getFileDescriptions()[i].filename = ins[i];
          dummy.getFileDescriptions()[i].size = ref_size;
          dummy.getFileDescriptions()[i].unique_id = ref_id;

          // add protein identifications to result map
          dummy.getProteinIdentifications().insert(
            dummy.getProteinIdentifications().end(),
            ref_protids.begin(),
            ref_protids.end());

          // add unassigned peptide identifications to result map
          dummy.getUnassignedPeptideIdentifications().insert(
            dummy.getUnassignedPeptideIdentifications().end(),
            ref_pepids.begin(),
            ref_pepids.end());
        }
      }

      // get the resulting map
      out_map = algorithm->getResultMap();

      //
      // Copy back meta-data (Protein / Peptide ids / File descriptions)
      //

      // add protein identifications to result map
      out_map.getProteinIdentifications().insert(
        out_map.getProteinIdentifications().end(),
        dummy.getProteinIdentifications().begin(),
        dummy.getProteinIdentifications().end());

      // add unassigned peptide identifications to result map
      out_map.getUnassignedPeptideIdentifications().insert(
        out_map.getUnassignedPeptideIdentifications().end(),
        dummy.getUnassignedPeptideIdentifications().begin(),
        dummy.getUnassignedPeptideIdentifications().end());

      out_map.setFileDescriptions(dummy.getFileDescriptions());

      // canonical ordering for checking the results, and the ids have no real meaning anyway
      // the way this was done in DelaunayPairFinder and StablePairFinder
      // -> the same ordering as FeatureGroupingAlgorithmUnlabeled::group applies!
      out_map.sortByMZ();
      out_map.updateRanges();
    }
    else
    {
      vector<ConsensusMap> maps(ins.size());
      ConsensusXMLFile f;
      for (Size i = 0; i < ins.size(); ++i)
      {
        f.load(ins[i], maps[i]);
        StringList ms_runs;
        maps[i].getPrimaryMSRunPath(ms_runs);
        ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
      }
      // group
      algorithm->FeatureGroupingAlgorithm::group(maps, out_map);

      // set file descriptions:
      bool keep_subelements = getFlag_("keep_subelements");
      if (!keep_subelements)
      {
        for (Size i = 0; i < ins.size(); ++i)
        {
          out_map.getFileDescriptions()[i].filename = ins[i];
          out_map.getFileDescriptions()[i].size = maps[i].size();
          out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId();
        }
      }
      else
      {
        // components of the output map are not the input maps themselves, but
        // the components of the input maps:
        algorithm->transferSubelements(maps, out_map);
      }
    }

    // assign unique ids
    out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId);

    // annotate output with data processing info
    addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING));

    out_map.setPrimaryMSRunPath(ms_run_locations);
    // write output
    ConsensusXMLFile().store(out, out_map);

    // some statistics
    map<Size, UInt> num_consfeat_of_size;
    for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit)
    {
      ++num_consfeat_of_size[cmit->size()];
    }

    LOG_INFO << "Number of consensus features:" << endl;
    for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i)
    {
      LOG_INFO << "  of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl;
    }
    LOG_INFO << "  total:      " << setw(6) << out_map.size() << endl;

    delete algorithm;

    return EXECUTION_OK;
  }
  void FeatureGroupingAlgorithmUnlabeled::group(const std::vector<FeatureMap> & maps, ConsensusMap & out)
  {
    // check that the number of maps is ok
    if (maps.size() < 2)
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "At least two maps must be given!");
    }

    // define reference map (the one with most peaks)
    Size reference_map_index = 0;
    Size max_count = 0;
    for (Size m = 0; m < maps.size(); ++m)
    {
      if (maps[m].size() > max_count)
      {
        max_count = maps[m].size();
        reference_map_index = m;
      }
    }

    std::vector<ConsensusMap> input(2);

    // build a consensus map of the elements of the reference map (contains only singleton consensus elements)
    MapConversion::convert(reference_map_index, maps[reference_map_index],
                          input[0]);

    // loop over all other maps, extend the groups
    StablePairFinder pair_finder;
    pair_finder.setParameters(param_.copy("", true));

    for (Size i = 0; i < maps.size(); ++i)
    {
      if (i != reference_map_index)
      {
        MapConversion::convert(i, maps[i], input[1]);
        // compute the consensus of the reference map and map i
        ConsensusMap result;
        pair_finder.run(input, result);
        input[0].swap(result);
      }
    }

    // replace result with temporary map
    out.swap(input[0]);
    // copy back the input maps (they have been deleted while swapping)
    out.getFileDescriptions() = input[0].getFileDescriptions();

    // add protein IDs and unassigned peptide IDs to the result map here,
    // to keep the same order as the input maps (useful for output later)
    for (std::vector<FeatureMap>::const_iterator map_it = maps.begin();
         map_it != maps.end(); ++map_it)
    {
      // add protein identifications to result map
      out.getProteinIdentifications().insert(
        out.getProteinIdentifications().end(),
        map_it->getProteinIdentifications().begin(),
        map_it->getProteinIdentifications().end());

      // add unassigned peptide identifications to result map
      out.getUnassignedPeptideIdentifications().insert(
        out.getUnassignedPeptideIdentifications().end(),
        map_it->getUnassignedPeptideIdentifications().begin(),
        map_it->getUnassignedPeptideIdentifications().end());
    }

    // canonical ordering for checking the results, and the ids have no real meaning anyway
#if 1 // the way this was done in DelaunayPairFinder and StablePairFinder
    out.sortByMZ();
#else
    out.sortByQuality();
    out.sortByMaps();
    out.sortBySize();
#endif

    return;
  }
  void LabeledPairFinder::run(const vector<ConsensusMap>& input_maps, ConsensusMap& result_map)
  {
    if (input_maps.size() != 1)
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "exactly one input map required");
    if (result_map.getFileDescriptions().size() != 2)
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "two file descriptions required");
    if (result_map.getFileDescriptions().begin()->second.filename != result_map.getFileDescriptions().rbegin()->second.filename)
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the two file descriptions have to contain the same file name");
    checkIds_(input_maps);

    //look up the light and heavy index
    Size light_index = numeric_limits<Size>::max();
    Size heavy_index = numeric_limits<Size>::max();
    for (ConsensusMap::FileDescriptions::const_iterator it = result_map.getFileDescriptions().begin();
         it != result_map.getFileDescriptions().end();
         ++it)
    {
      if (it->second.label == "heavy")
      {
        heavy_index = it->first;
      }
      else if (it->second.label == "light")
      {
        light_index = it->first;
      }
    }
    if (light_index == numeric_limits<Size>::max() || heavy_index == numeric_limits<Size>::max())
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the input maps have to be labeled 'light' and 'heavy'");
    }

    result_map.clear(false);

    // sort consensus features by RT (and MZ) to speed up searching afterwards
    typedef ConstRefVector<ConsensusMap> RefMap;
    RefMap model_ref(input_maps[0].begin(), input_maps[0].end());
    model_ref.sortByPosition();

    //calculate matches
    ConsensusMap matches;
    //settings
    double rt_pair_dist = param_.getValue("rt_pair_dist");
    double rt_dev_low = param_.getValue("rt_dev_low");
    double rt_dev_high = param_.getValue("rt_dev_high");
    double mz_dev = param_.getValue("mz_dev");
    DoubleList mz_pair_dists = param_.getValue("mz_pair_dists");
    bool mrm = param_.getValue("mrm").toBool();

    //estimate RT parameters
    if (param_.getValue("rt_estimate") == "true")
    {
      //find all possible RT distances of features with the same charge and a good m/z distance
      vector<double> dists;
      dists.reserve(model_ref.size());
      for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it)
      {
        for (RefMap::const_iterator it2 = model_ref.begin(); it2 != model_ref.end(); ++it2)
        {
          for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it)
          {
            double mz_pair_dist = *dist_it;
            if (it2->getCharge() == it->getCharge()
               && it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev
               && it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev)
            {
              dists.push_back(it2->getRT() - it->getRT());
            }
          }
        }
      }
      if (dists.empty())
      {
        cout << "Warning: Could not find pairs for RT distance estimation. The manual settings are used!" << endl;
      }
      else
      {
        if (dists.size() < 50)
        {
          cout << "Warning: Found only " << dists.size() << " pairs. The estimated shift and std deviation are probably not reliable!" << endl;
        }
        //--------------------------- estimate initial parameters of fit ---------------------------
        GaussFitter::GaussFitResult result(-1, -1, -1);
        //first estimate of the optimal shift: median of the distances
        sort(dists.begin(), dists.end());
        Size median_index = dists.size() / 2;
        result.x0 = dists[median_index];
        //create histogram of distances
        //consider only the maximum of pairs, centered around the optimal shift
        Size max_pairs = model_ref.size() / 2;
        Size start_index = (Size) max((SignedSize)0, (SignedSize)(median_index - max_pairs / 2));
        Size end_index = (Size) min((SignedSize)(dists.size() - 1), (SignedSize)(median_index + max_pairs / 2));
        double start_value = dists[start_index];
        double end_value = dists[end_index];
        double bin_step = fabs(end_value - start_value) / 99.999; //ensure that we have 100 bins
        Math::Histogram<> hist(start_value, end_value, bin_step);
        //std::cout << "HIST from " << start_value << " to " << end_value << " (bin size " << bin_step << ")" << endl;
        for (Size i = start_index; i <= end_index; ++i)
        {
          hist.inc(dists[i]);
        }
        //cout << hist << endl;
        dists.clear();
        //determine median of bins (uniform background distribution)
        vector<Size> bins(hist.begin(), hist.end());
        sort(bins.begin(), bins.end());
        Size bin_median = bins[bins.size() / 2];
        bins.clear();
        //estimate scale A: maximum of the histogram
        Size max_value = hist.maxValue();
        result.A = max_value - bin_median;
        //overwrite estimate of x0 with the position of the highest bin
        for (Size i = 0; i < hist.size(); ++i)
        {
          if (hist[i] == max_value)
          {
            result.x0 = hist.centerOfBin(i);
            break;
          }
        }
        //estimate sigma: first time the count is less or equal the median count in the histogram
        double pos = result.x0;
        while (pos > start_value && hist.binValue(pos) > bin_median)
        {
          pos -= bin_step;
        }
        double sigma_low =  result.x0 - pos;
        pos = result.x0;
        while (pos<end_value&& hist.binValue(pos)> bin_median)
        {
          pos += bin_step;
        }
        double sigma_high = pos - result.x0;
        result.sigma = (sigma_high + sigma_low) / 6.0;
        //cout << "estimated optimal RT distance (before fit): " << result.x0 << endl;
        //cout << "estimated allowed deviation (before fit): " << result.sigma*3.0 << endl;
        //--------------------------- do gauss fit ---------------------------
        vector<DPosition<2> > points(hist.size());
        for (Size i = 0; i < hist.size(); ++i)
        {
          points[i][0] = hist.centerOfBin(i);
          points[i][1] = max(0u, hist[i]);
        }
        GaussFitter fitter;
        fitter.setInitialParameters(result);
        result = fitter.fit(points);
        cout << "estimated optimal RT distance: " << result.x0 << endl;
        cout << "estimated allowed deviation: " << fabs(result.sigma) * 3.0 << endl;
        rt_pair_dist = result.x0;
        rt_dev_low = fabs(result.sigma) * 3.0;
        rt_dev_high = fabs(result.sigma) * 3.0;
      }
    }


    // check each feature
    for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it)
    {
      for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it)
      {
        double mz_pair_dist = *dist_it;
        RefMap::const_iterator it2 = lower_bound(model_ref.begin(), model_ref.end(), it->getRT() + rt_pair_dist - rt_dev_low, ConsensusFeature::RTLess());
        while (it2 != model_ref.end() && it2->getRT() <= it->getRT() + rt_pair_dist + rt_dev_high)
        {
          // if in mrm mode, we need to compare precursor mass difference and fragment mass difference, charge remains the same

          double prec_mz_diff(0);
          if (mrm)
          {
            prec_mz_diff = fabs((double)it2->getMetaValue("MZ") - (double)it->getMetaValue("MZ"));
            if (it->getCharge() != 0)
            {
              prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist / it->getCharge());
            }
            else
            {
              prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist);
            }
          }

          bool mrm_correct_dist(false);
          double frag_mz_diff = fabs(it->getMZ() - it2->getMZ());

          //cerr << it->getRT() << " charge1=" << it->getCharge() << ", charge2=" << it2->getCharge() << ", prec_diff=" << prec_mz_diff << ", frag_diff=" << frag_mz_diff << endl;

          if (mrm &&
              it2->getCharge() == it->getCharge() &&
              prec_mz_diff < mz_dev &&
              (frag_mz_diff < mz_dev || fabs(frag_mz_diff - mz_pair_dist) < mz_dev))
          {
            mrm_correct_dist = true;
            //cerr << "mrm_correct_dist" << endl;
          }

          if ((mrm && mrm_correct_dist) || (!mrm &&
                                            it2->getCharge() == it->getCharge() &&
                                            it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev &&
                                            it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev
                                            ))
          {
            //cerr << "dist correct" << endl;
            double score = sqrt(
              PValue_(it2->getMZ() - it->getMZ(), mz_pair_dist / it->getCharge(), mz_dev, mz_dev) *
              PValue_(it2->getRT() - it->getRT(), rt_pair_dist, rt_dev_low, rt_dev_high)
              );

            // Note: we used to copy the id from the light feature here, but that strategy does not generalize to more than two labels.
            // We might want to report consensus features where the light one is missing but more than one heavier variant was found.
            // Also, the old strategy is inconsistent with what was done in the unlabeled case.  Thus now we assign a new unique id here.
            matches.push_back(ConsensusFeature());
            matches.back().setUniqueId();

            matches.back().insert(light_index, *it);
            matches.back().clearMetaInfo();
            matches.back().insert(heavy_index, *it2);
            matches.back().setQuality(score);
            matches.back().setCharge(it->getCharge());
            matches.back().computeMonoisotopicConsensus();
          }
          ++it2;
        }
      }
    }

    //compute best pairs
    // - sort matches by quality
    // - take highest-quality matches first (greedy) and mark them as used
    set<Size> used_features;
    matches.sortByQuality(true);
    for (ConsensusMap::const_iterator match = matches.begin(); match != matches.end(); ++match)
    {
      //check if features are not used yet
      if (used_features.find(match->begin()->getUniqueId()) == used_features.end() &&
          used_features.find(match->rbegin()->getUniqueId()) == used_features.end()
          )
      {
        //if unused, add it to the final set of elements
        result_map.push_back(*match);
        used_features.insert(match->begin()->getUniqueId());
        used_features.insert(match->rbegin()->getUniqueId());
      }
    }

    //Add protein identifications to result map
    for (Size i = 0; i < input_maps.size(); ++i)
    {
      result_map.getProteinIdentifications().insert(result_map.getProteinIdentifications().end(), input_maps[i].getProteinIdentifications().begin(), input_maps[i].getProteinIdentifications().end());
    }

    //Add unassigned peptide identifications to result map
    for (Size i = 0; i < input_maps.size(); ++i)
    {
      result_map.getUnassignedPeptideIdentifications().insert(result_map.getUnassignedPeptideIdentifications().end(), input_maps[i].getUnassignedPeptideIdentifications().begin(), input_maps[i].getUnassignedPeptideIdentifications().end());
    }

    // Very useful for checking the results, and the ids have no real meaning anyway
    result_map.sortByMZ();
  }
Beispiel #8
0
  ExitCodes main_(int, const char **)
  {
    String in = getStringOption_("in"), out = getStringOption_("out"),
           id_out = getStringOption_("id_out");

    if (out.empty() && id_out.empty())
    {
      throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__,
                                                 __PRETTY_FUNCTION__,
                                                 "out/id_out");
    }

    vector<ProteinIdentification> proteins;
    vector<PeptideIdentification> peptides;

    FileTypes::Type in_type = FileHandler::getType(in);

    if (in_type == FileTypes::MZML)
    {
      MSExperiment<> experiment;
      MzMLFile().load(in, experiment);
      // what about unassigned peptide IDs?
      for (MSExperiment<>::Iterator exp_it = experiment.begin();
           exp_it != experiment.end(); ++exp_it)
      {
        peptides.insert(peptides.end(),
                        exp_it->getPeptideIdentifications().begin(),
                        exp_it->getPeptideIdentifications().end());
        exp_it->getPeptideIdentifications().clear();
      }
      experiment.getProteinIdentifications().swap(proteins);
      if (!out.empty())
      {
        addDataProcessing_(experiment,
                           getProcessingInfo_(DataProcessing::FILTERING));
        MzMLFile().store(out, experiment);
      }
    }
    else if (in_type == FileTypes::FEATUREXML)
    {
      FeatureMap features;
      FeatureXMLFile().load(in, features);
      features.getUnassignedPeptideIdentifications().swap(peptides);
      for (FeatureMap::Iterator feat_it = features.begin();
           feat_it != features.end(); ++feat_it)
      {
        peptides.insert(peptides.end(),
                        feat_it->getPeptideIdentifications().begin(),
                        feat_it->getPeptideIdentifications().end());
        feat_it->getPeptideIdentifications().clear();
      }
      features.getProteinIdentifications().swap(proteins);
      if (!out.empty())
      {
        addDataProcessing_(features,
                           getProcessingInfo_(DataProcessing::FILTERING));
        FeatureXMLFile().store(out, features);
      }
    }
    else         // consensusXML
    {
      ConsensusMap consensus;
      ConsensusXMLFile().load(in, consensus);
      consensus.getUnassignedPeptideIdentifications().swap(peptides);
      for (ConsensusMap::Iterator cons_it = consensus.begin();
           cons_it != consensus.end(); ++cons_it)
      {
        peptides.insert(peptides.end(),
                        cons_it->getPeptideIdentifications().begin(),
                        cons_it->getPeptideIdentifications().end());
        cons_it->getPeptideIdentifications().clear();
      }
      consensus.getProteinIdentifications().swap(proteins);
      if (!out.empty())
      {
        addDataProcessing_(consensus,
                           getProcessingInfo_(DataProcessing::FILTERING));
        ConsensusXMLFile().store(out, consensus);
      }
    }

    if (!id_out.empty())
    {
      // IDMapper can match a peptide ID to several overlapping features,
      // resulting in duplicates; this shouldn't be the case for peak data
      if (in_type != FileTypes::MZML) removeDuplicates_(peptides);
      IdXMLFile().store(id_out, proteins, peptides);
    }

    return EXECUTION_OK;
  }
TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[0].getSequence(), "ABCDEFG")
TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[1].getSequence(), "HIJKLMN")
TEST_EQUAL(map.getProteinIdentifications()[1].getHits().size(), 1)
TEST_EQUAL(map.getProteinIdentifications()[1].getHits()[0].getSequence(), "OPQREST")
//peptide identifications
TEST_EQUAL(map[0].getPeptideIdentifications().size(), 2)
TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits().size(), 1)
TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), "A")
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits().size(), 2)
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[0].getSequence(), "C")
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[1].getSequence(), "D")
TEST_EQUAL(map[1].getPeptideIdentifications().size(), 1)
TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits().size(), 1)
TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits()[0].getSequence(), "E")
//unassigned peptide identifications
TEST_EQUAL(map.getUnassignedPeptideIdentifications().size(), 2)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits().size(), 1)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits()[0].getSequence(), "F")
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits().size(), 2)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[0].getSequence(), "G")
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[1].getSequence(), "H")

//features
TEST_EQUAL(map.size(), 6)
ConsensusFeature cons_feature = map[0];
TEST_REAL_SIMILAR(cons_feature.getRT(), 1273.27)
TEST_REAL_SIMILAR(cons_feature.getMZ(), 904.47)
TEST_REAL_SIMILAR(cons_feature.getIntensity(), 3.12539e+07)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1273.27)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1273.27)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 904.47)
TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[0].getSequence(), "ABCDEFG")
TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[1].getSequence(), "HIJKLMN")
TEST_EQUAL(map.getProteinIdentifications()[1].getHits().size(), 1)
TEST_EQUAL(map.getProteinIdentifications()[1].getHits()[0].getSequence(), "OPQREST")
//peptide identifications
TEST_EQUAL(map[0].getPeptideIdentifications().size(), 2)
TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits().size(), 1)
TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("A"))
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits().size(), 2)
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[0].getSequence(), AASequence::fromString("C"))
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[1].getSequence(), AASequence::fromString("D"))
TEST_EQUAL(map[1].getPeptideIdentifications().size(), 1)
TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits().size(), 1)
TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits()[0].getSequence(),AASequence::fromString( "E"))
//unassigned peptide identifications
TEST_EQUAL(map.getUnassignedPeptideIdentifications().size(), 2)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits().size(), 1)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("F"))
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits().size(), 2)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[0].getSequence(), AASequence::fromString("G"))
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[1].getSequence(), AASequence::fromString("H"))

//features
TEST_EQUAL(map.size(), 6)
ConsensusFeature cons_feature = map[0];
TEST_REAL_SIMILAR(cons_feature.getRT(), 1273.27)
TEST_REAL_SIMILAR(cons_feature.getMZ(), 904.47)
TEST_REAL_SIMILAR(cons_feature.getIntensity(), 3.12539e+07)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1273.27)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1273.27)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 904.47)