Beispiel #1
0
  void LowessSmoothing::smoothData(const DoubleVector& input_x, const DoubleVector& input_y, DoubleVector& smoothed_output)
  {
    if (input_x.size() != input_y.size())
    {
      throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
          "Sizes of x and y values not equal! Aborting... ", String(input_x.size()));
    }

    // unable to smooth over 2 or less data points (we need at least 3)
    if (input_x.size() <= 2)
    {
      smoothed_output = input_y;
      return;
    }

    Size input_size = input_y.size();

    // const Size q = floor( input_size * alpha );
    const Size q = (window_size_ < input_size) ? static_cast<Size>(window_size_) : input_size - 1;

    DoubleVector distances(input_size, 0.0);
    DoubleVector sortedDistances(input_size, 0.0);

    for (Size outer_idx = 0; outer_idx < input_size; ++outer_idx)
    {
      // Compute distances.
      // Size inner_idx = 0;
      for (Size inner_idx = 0; inner_idx < input_size; ++inner_idx)
      {
        distances[inner_idx] = std::fabs(input_x[outer_idx] - input_x[inner_idx]);
        sortedDistances[inner_idx] = distances[inner_idx];
      }

      // Sort distances in order from smallest to largest.
      std::sort(sortedDistances.begin(), sortedDistances.end());

      // Compute weigths.
      std::vector<double> weigths(input_size, 0);
      for (Size inner_idx = 0; inner_idx < input_size; ++inner_idx)
      {
        weigths.at(inner_idx) = tricube_(distances[inner_idx], sortedDistances[q]);
      }

      //calculate regression
      Math::QuadraticRegression qr;
      std::vector<double>::const_iterator w_begin = weigths.begin();
      qr.computeRegressionWeighted(input_x.begin(), input_x.end(), input_y.begin(), w_begin);

      //smooth y-values
      double rt = input_x[outer_idx];
      smoothed_output.push_back(qr.eval(rt));
    }

    return;
  }
Beispiel #2
0
  void TOFCalibration::calculateCalibCoeffs_(MSExperiment<> & calib_spectra)
  {
    // flight times are needed later
    calib_peaks_ft_ = calib_spectra;


    // convert flight times of peaks into m/z values
    applyTOFConversion_(calib_spectra);
    std::vector<std::vector<unsigned int> > monoiso_peaks;
    getMonoisotopicPeaks_(calib_spectra, monoiso_peaks);

    startProgress(0, calib_spectra.size(), "quadratic fitting of calibrant spectra");
    // do the quadratic fitting for each calibration spectra separately
    for (unsigned int spec = 0; spec < calib_spectra.size(); ++spec)
    {
      std::vector<unsigned int> monoiso_peaks_scan;
      std::vector<double> exp_masses;
      // match the m/z-values to the expected masses
      matchMasses_(calib_spectra, monoiso_peaks, monoiso_peaks_scan, exp_masses, spec);

      // the actual quadratic fitting part
      Size n = exp_masses.size();
      if (n < 3)
      {
        continue;
      }

      // matrix containing the observations
      std::vector<double> x;
      // vector containing the expected masses
      std::vector<double> y;

      for (Size i = 0; i < n; i++)
      {
        // get the flight time
        double xi = ((calib_peaks_ft_.begin() + spec)->begin() + monoiso_peaks_scan[i])->getMZ();
        x.push_back(xi);
        y.push_back(exp_masses[i]);
      }

      Math::QuadraticRegression qr;
      qr.computeRegression(x.begin(), x.end(), y.begin());

#ifdef DEBUG_CALIBRATION
      std::cout << "chi^2: " << qr.getChiSquared() << std::endl;//DEBUG
      std::cout << "a: " << qr.getA() << "b: " << qr.getB()
            << "c: " << qr.getC() << std::endl;//DEBUG
#endif
      // store the coefficients
      coeff_quad_fit_.push_back(qr.getA());
      coeff_quad_fit_.push_back(qr.getB());
      coeff_quad_fit_.push_back(qr.getC());

      // determine the errors in ppm
      for (Size p = 0; p < n; ++p)
      {
#ifdef DEBUG_CALIBRATION
        std::cout << exp_masses[p]
                  << "\t" << mQ_(calib_peaks_ft_[spec][monoiso_peaks_scan[p]].getMZ(), spec) - exp_masses[p] << std::endl;
#endif
        errors_[exp_masses[p]].push_back((mQ_(calib_peaks_ft_[spec][monoiso_peaks_scan[p]].getMZ(), spec) - exp_masses[p]));
      }
      setProgress(spec);
    }
    endProgress();

    if (coeff_quad_fit_.empty())
    {
      String mess = String("Data can't be calibrated, not enough reference masses found: ") + coeff_quad_fit_.size() / 3;
      throw Exception::UnableToCalibrate(__FILE__, __LINE__, __PRETTY_FUNCTION__, "UnableToCalibrate", mess.c_str());
    }
    averageErrors_();
    averageCoefficients_();
  }
Beispiel #3
0
  bool MZTrafoModel::train( std::vector<double> obs_mz, std::vector<double> theo_mz, std::vector<double> weights, MODELTYPE md, bool use_RANSAC )
  {
    coeff_.clear();

    if (obs_mz.empty())
    {
      //LOG_ERROR << "Input to calibration model is empty!" << std::endl;
      return false;
    }

    if (use_RANSAC)
    {
      if (ransac_params_ == nullptr)
      {
        throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "TrafoModel::train(): no RANSAC parameters were set before calling train(). Internal error!");
      }
      if (!(md == LINEAR || md == QUADRATIC))
      {
        LOG_ERROR << "RANSAC is implemented for LINEAR and QUADRATIC models only! Please disable RANSAC or choose the LINEAR or QUADRATIC model." << std::endl;
        throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
      }
    }

    try
    {
      if (md == LINEAR)
      {
        if (obs_mz.size() < 2) return false;

        if (use_RANSAC && 
          (obs_mz.size() > ransac_params_->n)) // with fewer points, RANSAC will fail
        {
          std::vector<std::pair<double, double> > r, pairs;
          for (Size i = 0; i < obs_mz.size(); ++i)
          {
            pairs.push_back(std::make_pair(theo_mz[i], obs_mz[i]));
          }
          r = Math::RANSAC<Math::RansacModelLinear>().ransac(pairs, *ransac_params_);
          if (r.size() < 2)
          {
            return false; // RANSAC failed
          }
          obs_mz.clear();
          theo_mz.clear();
          for (Size i = 0; i < r.size(); ++i)
          {
            theo_mz.push_back(r[i].first);
            obs_mz.push_back(r[i].second);
          }
        }

        double confidence_interval_P(0.0);
        Math::LinearRegression lr;
        lr.computeRegression(confidence_interval_P, theo_mz.begin(), theo_mz.end(), obs_mz.begin(), false);
        coeff_.push_back(lr.getIntercept());
        coeff_.push_back(lr.getSlope());
        coeff_.push_back(0.0);
      }
      else if (md == LINEAR_WEIGHTED)
      {
        if (obs_mz.size() < 2) return false;

        double confidence_interval_P(0.0);
        Math::LinearRegression lr;
        lr.computeRegressionWeighted(confidence_interval_P, theo_mz.begin(), theo_mz.end(), obs_mz.begin(), weights.begin(), false);
        coeff_.push_back(lr.getIntercept());
        coeff_.push_back(lr.getSlope());
        coeff_.push_back(0.0);
      }
      else if (md == QUADRATIC)
      {
        if (obs_mz.size() < 3) return false;

        if (use_RANSAC && 
          (obs_mz.size() > ransac_params_->n)) // with fewer points, RANSAC will fail
        {

          std::vector<std::pair<double, double> > r, pairs;
          for (Size i = 0; i < obs_mz.size(); ++i)
          {
            pairs.push_back(std::make_pair(theo_mz[i], obs_mz[i]));
          }
          r = Math::RANSAC<Math::RansacModelQuadratic>().ransac(pairs, *ransac_params_);
          obs_mz.clear();
          theo_mz.clear();
          for (Size i = 0; i < r.size(); ++i)
          {
            theo_mz.push_back(r[i].first);
            obs_mz.push_back(r[i].second);
          }
        }
        // Quadratic fit
        Math::QuadraticRegression qr;
        qr.computeRegression(theo_mz.begin(), theo_mz.end(), obs_mz.begin());
        coeff_.push_back(qr.getA());
        coeff_.push_back(qr.getB());
        coeff_.push_back(qr.getC());
      }
      else if (md == QUADRATIC_WEIGHTED)
      {
        if (obs_mz.size() < 3) return false;

        // Quadratic fit (weighted)
        Math::QuadraticRegression qr;
        qr.computeRegressionWeighted(theo_mz.begin(), theo_mz.end(), obs_mz.begin(), weights.begin());
        coeff_.push_back(qr.getA());
        coeff_.push_back(qr.getB());
        coeff_.push_back(qr.getC());
      }

#ifdef DEBUG_TRAFOMODEL
      printf("# mz regression parameters: Y = %3.10f + %3.10f X + %3.10f X^2\n",
        coeff_[0],
        coeff_[1],
        coeff_[2]);

      // print results
      std::cout << "Calibration details:\n\n";
      std::cout << "m/z(theo) m/z(obs) ppm(before) | ppm(after)\n";
      std::vector<double> st_ppm_before, st_ppm_after;
      for (Size i = 0; i < obs_mz.size(); i++)
      {
        if (use_ppm_) st_ppm_before.push_back(obs_mz[i]);
        else st_ppm_before.push_back(Math::getPPM(theo_mz[i], obs_mz[i]));

        double obs_mz_v = obs_mz[i];
        if (use_ppm_) obs_mz_v = Math::ppmToMass(obs_mz_v, theo_mz[i]) + theo_mz[i]; //

        st_ppm_after.push_back(Math::getPPM(theo_mz[i], predict(obs_mz_v))); // predict() is ppm-aware itself

        printf("%4.5f  %4.5f  %2.1f | %2.1f\n", theo_mz[i], obs_mz_v, st_ppm_before.back(), st_ppm_after.back());
      }
      // use median and MAD to ignore outliers
      double m = Math::median(st_ppm_before.begin(), st_ppm_before.end());
      std::cout << "ppm before: median = " << m << "  MAD = " << Math::MAD(st_ppm_before.begin(), st_ppm_before.end(), m) << "\n";
      m = Math::median(st_ppm_after.begin(), st_ppm_after.end()); 
      std::cout << "ppm after : median = " << m << "  MAD = " << Math::MAD(st_ppm_after.begin(), st_ppm_after.end(), m) << "\n";
#endif

      return true;
    }
    catch (Exception::BaseException& /*e*/)
    {
      //LOG_ERROR << "Exception during model fitting: " << e.getMessage() << std::endl;
      return false;
    }
  }