void MapAlignmentAlgorithmPoseClustering::align(const ConsensusMap & map, TransformationDescription & trafo)
  {
    // TODO: move this to updateMembers_? (if consensusMap prevails)
    // TODO: why does superimposer work on consensus map???
    const ConsensusMap & map_model = reference_;
    ConsensusMap map_scene = map;

    // run superimposer to find the global transformation
    TransformationDescription si_trafo;
    superimposer_.run(map_model, map_scene, si_trafo);

    // apply transformation to consensus features and contained feature
    // handles
    for (Size j = 0; j < map_scene.size(); ++j)
    {
      //Calculate new RT
      double rt = map_scene[j].getRT();
      rt = si_trafo.apply(rt);
      //Set RT of consensus feature centroid
      map_scene[j].setRT(rt);
      //Set RT of consensus feature handles
      map_scene[j].begin()->asMutable().setRT(rt);
    }

    //run pairfinder to find pairs
    ConsensusMap result;
    //TODO: add another 2map interface to pairfinder?
    std::vector<ConsensusMap> input(2);
    input[0] = map_model;
    input[1] = map_scene;
    pairfinder_.run(input, result);

    // calculate the local transformation
    si_trafo.invert();         // to undo the transformation applied above
    TransformationDescription::DataPoints data;
    for (ConsensusMap::Iterator it = result.begin(); it != result.end();
         ++it)
    {
      if (it->size() == 2)           // two matching features
      {
        ConsensusFeature::iterator feat_it = it->begin();
        double y = feat_it->getRT();
        double x = si_trafo.apply((++feat_it)->getRT());
        // one feature should be from the reference map:
        if (feat_it->getMapIndex() != 0)
        {
          data.push_back(make_pair(x, y));
        }
        else
        {
          data.push_back(make_pair(y, x));
        }
      }
    }
    trafo = TransformationDescription(data);
    trafo.fitModel("linear");
  }
Ejemplo n.º 2
0
  void InternalCalibration::makeLinearRegression_(std::vector<DoubleReal> & observed_masses, std::vector<DoubleReal> & theoretical_masses)
  {
    if (observed_masses.size() != theoretical_masses.size())
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Number of observed and theoretical masses must agree.");
    }
#ifdef DEBUG_CALIBRATION
    std::ofstream out("calibration_regression.txt");
    std::vector<DoubleReal> rel_errors(observed_masses.size(), 0.);
    // determine rel error in ppm for the two reference masses
    for (Size ref_peak = 0; ref_peak < observed_masses.size(); ++ref_peak)
    {
      rel_errors[ref_peak] = (theoretical_masses[ref_peak] - observed_masses[ref_peak]) / theoretical_masses[ref_peak] * 1e6;

      out << observed_masses[ref_peak] << "\t" << rel_errors[ref_peak] << "\n";
      std::cout << observed_masses[ref_peak] << " " << theoretical_masses[ref_peak] << std::endl;
      // std::cout << observed_masses[ref_peak]<<"\t"<<rel_errors[ref_peak]<<std::endl;
    }
#endif

    TransformationDescription::DataPoints data;
    for (Size i = 0; i < observed_masses.size(); ++i)
    {
      data.push_back(std::make_pair(observed_masses[i],
                                    theoretical_masses[i]));
    }

    trafo_ = TransformationDescription(data);
    trafo_.fitModel("linear", Param());

#ifdef DEBUG_CALIBRATION
    //          std::cout <<"\n\n---------------------------------\n\n"<< "after calibration "<<std::endl;
    for (Size i = 0; i < observed_masses.size(); ++i)
    {
      DoubleReal new_mass = trafo_.apply(observed_masses[i]);

      DoubleReal rel_error = (theoretical_masses[i] - (new_mass)) / theoretical_masses[i] * 1e6;
      std::cout << observed_masses[i] << "\t" << rel_error << std::endl;
    }
#endif
  }
Ejemplo n.º 3
0
  ExitCodes main_(int, const char **)
  {
    String in = getStringOption_("in"), out = getStringOption_("out");

    TransformationDescription trafo_in;
    TransformationXMLFile().load(in, trafo_in);
    TransformationDescription::DataPoints data;

    DoubleReal min = getDoubleOption_("min"), max = getDoubleOption_("max"),
               step = getDoubleOption_("step");
    if (max <= min)
    {
      data = trafo_in.getDataPoints();
      sort(data.begin(), data.end());
      max = data.back().first;
      DoubleReal magnitude = floor(log10(max));
      max = Math::ceilDecimal(max, magnitude - 1);
      if (max <= min)
      {
        throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "'min' must be lower than 'max'");
      }
    }

    data.clear();
    for (DoubleReal value = min; value <= max; value += step)
    {
      DoubleReal transformed = trafo_in.apply(value);
      if (out.empty())
      {
        cout << value << '\t' << transformed << endl;
      }
      else data.push_back(make_pair(value, transformed));
    }

    if (!out.empty())
    {
      TransformationDescription trafo_out(trafo_in);
      trafo_out.setDataPoints(data);
      TransformationXMLFile().store(out, trafo_out);
    }

    return EXECUTION_OK;
  }
  void MapAlignmentAlgorithmIdentification::computeTransformations_(
    vector<SeqToList>& rt_data, vector<TransformationDescription>& transforms,
    bool sorted)
  {
    Int size = rt_data.size(); // not Size because we compare to Ints later
    transforms.clear();

    // filter RT data (remove peptides that elute in several fractions):
    // TODO

    // compute RT medians:
    LOG_DEBUG << "Computing RT medians..." << endl;
    vector<SeqToValue> medians_per_run(size);
    for (Int i = 0; i < size; ++i)
    {
      computeMedians_(rt_data[i], medians_per_run[i], sorted);
    }
    SeqToList medians_per_seq;
    for (vector<SeqToValue>::iterator run_it = medians_per_run.begin();
         run_it != medians_per_run.end(); ++run_it)
    {
      for (SeqToValue::iterator med_it = run_it->begin();
           med_it != run_it->end(); ++med_it)
      {
        medians_per_seq[med_it->first].push_back(med_it->second);
      }
    }

    // get reference retention time scale: either directly from reference file,
    // or compute consensus time scale
    bool reference_given = !reference_.empty(); // reference file given
    if (reference_given)
    {
      // remove peptides that don't occur in enough runs:
      LOG_DEBUG << "Removing peptides that occur in too few runs..." << endl;
      SeqToValue temp;
      for (SeqToValue::iterator ref_it = reference_.begin();
           ref_it != reference_.end(); ++ref_it)
      {
        SeqToList::iterator med_it = medians_per_seq.find(ref_it->first);
        if ((med_it != medians_per_seq.end()) &&
            (med_it->second.size() + 1 >= min_run_occur_))
        {
          temp.insert(temp.end(), *ref_it); // new items should go at the end
        }
      }
      LOG_DEBUG << "Removed " << reference_.size() - temp.size() << " of "
                << reference_.size() << " peptides." << endl;
      temp.swap(reference_);
    }
    else // compute overall RT median per sequence (median of medians per run)
    {
      LOG_DEBUG << "Computing overall RT medians per sequence..." << endl;

      // remove peptides that don't occur in enough runs (at least two):
      LOG_DEBUG << "Removing peptides that occur in too few runs..." << endl;
      SeqToList temp;
      for (SeqToList::iterator med_it = medians_per_seq.begin();
           med_it != medians_per_seq.end(); ++med_it)
      {
        if (med_it->second.size() >= min_run_occur_)
        {
          temp.insert(temp.end(), *med_it);
        }
      }
      LOG_DEBUG << "Removed " << medians_per_seq.size() - temp.size() << " of "
                << medians_per_seq.size() << " peptides." << endl;
      temp.swap(medians_per_seq);
      computeMedians_(medians_per_seq, reference_);
    }
    if (reference_.empty())
    {
      throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "No reference RT information left after filtering");
    }

    double max_rt_shift = param_.getValue("max_rt_shift");
    if (max_rt_shift <= 1)
    {
      // compute max. allowed shift from overall retention time range:
      double rt_min = numeric_limits<double>::infinity(), rt_max = -rt_min;
      for (SeqToValue::iterator it = reference_.begin(); it != reference_.end();
           ++it)
      {
        rt_min = min(rt_min, it->second);
        rt_max = max(rt_max, it->second);
      }
      double rt_range = rt_max - rt_min;
      max_rt_shift *= rt_range;
      // in the degenerate case of only one reference point, "max_rt_shift"
      // should be zero (because "rt_range" is zero) - this is covered below
    }
    if (max_rt_shift == 0)
    {
      max_rt_shift = numeric_limits<double>::max();
    }
    LOG_DEBUG << "Max. allowed RT shift (in seconds): " << max_rt_shift << endl;

    // generate RT transformations:
    LOG_DEBUG << "Generating RT transformations..." << endl;
    LOG_INFO << "\nAlignment based on:" << endl; // diagnostic output
    Size offset = 0; // offset in case of internal reference
    for (Int i = 0; i < size + 1; ++i)
    {
      if (i == reference_index_)
      {
        // if one of the input maps was used as reference, it has been skipped
        // so far - now we have to consider it again:
        TransformationDescription trafo;
        trafo.fitModel("identity");
        transforms.push_back(trafo);
        LOG_INFO << "- " << reference_.size() << " data points for sample "
                 << i + 1 << " (reference)\n";
        offset = 1;
      }
      if (i >= size) break;

      // to be useful for the alignment, a peptide sequence has to occur in the
      // current run ("medians_per_run[i]"), but also in at least one other run
      // ("medians_overall"):
      TransformationDescription::DataPoints data;
      Size n_outliers = 0;
      for (SeqToValue::iterator med_it = medians_per_run[i].begin();
           med_it != medians_per_run[i].end(); ++med_it)
      {
        SeqToValue::const_iterator pos = reference_.find(med_it->first);
        if (pos != reference_.end())
        {
          if (abs(med_it->second - pos->second) <= max_rt_shift)
          { // found, and satisfies "max_rt_shift" condition!
            TransformationDescription::DataPoint point(med_it->second,
                                                       pos->second, pos->first);
            data.push_back(point);
          }
          else
          {
            n_outliers++;
          }
        }
      }
      transforms.push_back(TransformationDescription(data));
      LOG_INFO << "- " << data.size() << " data points for sample "
               << i + offset + 1;
      if (n_outliers) LOG_INFO << " (" << n_outliers << " outliers removed)";
      LOG_INFO << "\n";
    }
    LOG_INFO << endl;

    // delete temporary reference
    if (!reference_given) reference_.clear();
  }
  void MapAlignmentAlgorithmIdentification::computeTransformations_(
    vector<SeqToList> & rt_data, vector<TransformationDescription> & transforms,
    bool sorted)
  {
    Size size = rt_data.size();
    transforms.clear();

    // filter RT data (remove peptides that elute in several fractions):
    // TODO

    // compute RT medians:
    LOG_DEBUG << "Computing RT medians..." << endl;
    vector<SeqToValue> medians_per_run(size);
    for (Size i = 0; i < size; ++i)
    {
      computeMedians_(rt_data[i], medians_per_run[i], sorted);
    }
    SeqToList medians_per_seq;
    for (vector<SeqToValue>::iterator run_it = medians_per_run.begin();
         run_it != medians_per_run.end(); ++run_it)
    {
      for (SeqToValue::iterator med_it = run_it->begin();
           med_it != run_it->end(); ++med_it)
      {
        medians_per_seq[med_it->first] << med_it->second;
      }
    }

    // get reference retention time scale: either directly from reference file,
    // or compute consensus time scale
    bool reference_given = !reference_.empty();     // reference file given
    if (reference_given)
    {
      // remove peptides that don't occur in enough runs:
      LOG_DEBUG << "Removing peptides that occur in too few runs..." << endl;
      SeqToValue temp;
      SeqToValue::iterator pos = temp.begin();       // to prevent segfault below
      for (SeqToValue::iterator ref_it = reference_.begin();
           ref_it != reference_.end(); ++ref_it)
      {
        SeqToList::iterator med_it = medians_per_seq.find(ref_it->first);
        if ((med_it != medians_per_seq.end()) &&
            (med_it->second.size() + 1 >= min_run_occur_))
        {
          temp.insert(pos, *ref_it);
          pos = --temp.end();           // would cause segfault if "temp" was empty
        }
      }
      temp.swap(reference_);
    }
    else     // compute overall RT median per sequence (median of medians per run)
    {
      LOG_DEBUG << "Computing overall RT medians per sequence..." << endl;

      // remove peptides that don't occur in enough runs (at least two):
      LOG_DEBUG << "Removing peptides that occur in too few runs..." << endl;
      SeqToList temp;
      SeqToList::iterator pos = temp.begin();       // to prevent segfault below
      for (SeqToList::iterator med_it = medians_per_seq.begin();
           med_it != medians_per_seq.end(); ++med_it)
      {
        if (med_it->second.size() >= min_run_occur_)
        {
          temp.insert(pos, *med_it);
          pos = --temp.end();           // would cause segfault if "temp" was empty
        }
      }
      temp.swap(medians_per_seq);
      computeMedians_(medians_per_seq, reference_);
    }

    DoubleReal max_rt_shift = param_.getValue("max_rt_shift");
    if (max_rt_shift == 0)
    {
      max_rt_shift = numeric_limits<DoubleReal>::max();
    }
    else if (max_rt_shift <= 1) // compute max. allowed shift from overall retention time range:
    {
      DoubleReal rt_range, rt_min = reference_.begin()->second,
                 rt_max = rt_min;
      for (SeqToValue::iterator it = ++reference_.begin();
           it != reference_.end(); ++it)
      {
        rt_min = min(rt_min, it->second);
        rt_max = max(rt_max, it->second);
      }
      rt_range = rt_max - rt_min;
      max_rt_shift *= rt_range;
    }
    LOG_DEBUG << "Max. allowed RT shift (in seconds): " << max_rt_shift << endl;

    // generate RT transformations:
    LOG_DEBUG << "Generating RT transformations..." << endl;
    LOG_INFO << "\nAlignment based on:" << endl;     // diagnostic output
    for (Size i = 0, offset = 0; i < size + 1; ++i)
    {
      if (i == reference_index_ - 1)
      {
        // if one of the input maps was used as reference, it has been skipped
        // so far - now we have to consider it again:
        TransformationDescription trafo;
        trafo.fitModel("identity");
        transforms.push_back(trafo);
        LOG_INFO << "- 0 data points for sample " << i + 1 << " (reference)\n";
        offset = 1;
      }

      if (i >= size)
        break;

      // to be useful for the alignment, a peptide sequence has to occur in the
      // current run ("medians_per_run[i]"), but also in at least one other run
      // ("medians_overall"):
      TransformationDescription::DataPoints data;
      for (SeqToValue::iterator med_it = medians_per_run[i].begin();
           med_it != medians_per_run[i].end(); ++med_it)
      {
        SeqToValue::const_iterator pos = reference_.find(med_it->first);
        if ((pos != reference_.end()) &&
            (fabs(med_it->second - pos->second) <= max_rt_shift))
        {         // found, and satisfies "max_rt_shift" condition!
          data.push_back(make_pair(med_it->second, pos->second));
        }
      }
      transforms.push_back(TransformationDescription(data));
      LOG_INFO << "- " << data.size() << " data points for sample "
               << i + offset + 1 << "\n";
    }
    LOG_INFO << endl;

    // delete temporary reference
    if (!reference_given)
      reference_.clear();

  }