Пример #1
0
  void LowessSmoothing::smoothData(const DoubleVector& input_x, const DoubleVector& input_y, DoubleVector& smoothed_output)
  {
    if (input_x.size() != input_y.size())
    {
      throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
          "Sizes of x and y values not equal! Aborting... ", String(input_x.size()));
    }

    // unable to smooth over 2 or less data points (we need at least 3)
    if (input_x.size() <= 2)
    {
      smoothed_output = input_y;
      return;
    }

    Size input_size = input_y.size();

    // const Size q = floor( input_size * alpha );
    const Size q = (window_size_ < input_size) ? static_cast<Size>(window_size_) : input_size - 1;

    DoubleVector distances(input_size, 0.0);
    DoubleVector sortedDistances(input_size, 0.0);

    for (Size outer_idx = 0; outer_idx < input_size; ++outer_idx)
    {
      // Compute distances.
      // Size inner_idx = 0;
      for (Size inner_idx = 0; inner_idx < input_size; ++inner_idx)
      {
        distances[inner_idx] = std::fabs(input_x[outer_idx] - input_x[inner_idx]);
        sortedDistances[inner_idx] = distances[inner_idx];
      }

      // Sort distances in order from smallest to largest.
      std::sort(sortedDistances.begin(), sortedDistances.end());

      // Compute weigths.
      std::vector<double> weigths(input_size, 0);
      for (Size inner_idx = 0; inner_idx < input_size; ++inner_idx)
      {
        weigths.at(inner_idx) = tricube_(distances[inner_idx], sortedDistances[q]);
      }

      //calculate regression
      Math::QuadraticRegression qr;
      std::vector<double>::const_iterator w_begin = weigths.begin();
      qr.computeRegressionWeighted(input_x.begin(), input_x.end(), input_y.begin(), w_begin);

      //smooth y-values
      double rt = input_x[outer_idx];
      smoothed_output.push_back(qr.eval(rt));
    }

    return;
  }
Пример #2
0
  bool MZTrafoModel::train( std::vector<double> obs_mz, std::vector<double> theo_mz, std::vector<double> weights, MODELTYPE md, bool use_RANSAC )
  {
    coeff_.clear();

    if (obs_mz.empty())
    {
      //LOG_ERROR << "Input to calibration model is empty!" << std::endl;
      return false;
    }

    if (use_RANSAC)
    {
      if (ransac_params_ == nullptr)
      {
        throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "TrafoModel::train(): no RANSAC parameters were set before calling train(). Internal error!");
      }
      if (!(md == LINEAR || md == QUADRATIC))
      {
        LOG_ERROR << "RANSAC is implemented for LINEAR and QUADRATIC models only! Please disable RANSAC or choose the LINEAR or QUADRATIC model." << std::endl;
        throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
      }
    }

    try
    {
      if (md == LINEAR)
      {
        if (obs_mz.size() < 2) return false;

        if (use_RANSAC && 
          (obs_mz.size() > ransac_params_->n)) // with fewer points, RANSAC will fail
        {
          std::vector<std::pair<double, double> > r, pairs;
          for (Size i = 0; i < obs_mz.size(); ++i)
          {
            pairs.push_back(std::make_pair(theo_mz[i], obs_mz[i]));
          }
          r = Math::RANSAC<Math::RansacModelLinear>().ransac(pairs, *ransac_params_);
          if (r.size() < 2)
          {
            return false; // RANSAC failed
          }
          obs_mz.clear();
          theo_mz.clear();
          for (Size i = 0; i < r.size(); ++i)
          {
            theo_mz.push_back(r[i].first);
            obs_mz.push_back(r[i].second);
          }
        }

        double confidence_interval_P(0.0);
        Math::LinearRegression lr;
        lr.computeRegression(confidence_interval_P, theo_mz.begin(), theo_mz.end(), obs_mz.begin(), false);
        coeff_.push_back(lr.getIntercept());
        coeff_.push_back(lr.getSlope());
        coeff_.push_back(0.0);
      }
      else if (md == LINEAR_WEIGHTED)
      {
        if (obs_mz.size() < 2) return false;

        double confidence_interval_P(0.0);
        Math::LinearRegression lr;
        lr.computeRegressionWeighted(confidence_interval_P, theo_mz.begin(), theo_mz.end(), obs_mz.begin(), weights.begin(), false);
        coeff_.push_back(lr.getIntercept());
        coeff_.push_back(lr.getSlope());
        coeff_.push_back(0.0);
      }
      else if (md == QUADRATIC)
      {
        if (obs_mz.size() < 3) return false;

        if (use_RANSAC && 
          (obs_mz.size() > ransac_params_->n)) // with fewer points, RANSAC will fail
        {

          std::vector<std::pair<double, double> > r, pairs;
          for (Size i = 0; i < obs_mz.size(); ++i)
          {
            pairs.push_back(std::make_pair(theo_mz[i], obs_mz[i]));
          }
          r = Math::RANSAC<Math::RansacModelQuadratic>().ransac(pairs, *ransac_params_);
          obs_mz.clear();
          theo_mz.clear();
          for (Size i = 0; i < r.size(); ++i)
          {
            theo_mz.push_back(r[i].first);
            obs_mz.push_back(r[i].second);
          }
        }
        // Quadratic fit
        Math::QuadraticRegression qr;
        qr.computeRegression(theo_mz.begin(), theo_mz.end(), obs_mz.begin());
        coeff_.push_back(qr.getA());
        coeff_.push_back(qr.getB());
        coeff_.push_back(qr.getC());
      }
      else if (md == QUADRATIC_WEIGHTED)
      {
        if (obs_mz.size() < 3) return false;

        // Quadratic fit (weighted)
        Math::QuadraticRegression qr;
        qr.computeRegressionWeighted(theo_mz.begin(), theo_mz.end(), obs_mz.begin(), weights.begin());
        coeff_.push_back(qr.getA());
        coeff_.push_back(qr.getB());
        coeff_.push_back(qr.getC());
      }

#ifdef DEBUG_TRAFOMODEL
      printf("# mz regression parameters: Y = %3.10f + %3.10f X + %3.10f X^2\n",
        coeff_[0],
        coeff_[1],
        coeff_[2]);

      // print results
      std::cout << "Calibration details:\n\n";
      std::cout << "m/z(theo) m/z(obs) ppm(before) | ppm(after)\n";
      std::vector<double> st_ppm_before, st_ppm_after;
      for (Size i = 0; i < obs_mz.size(); i++)
      {
        if (use_ppm_) st_ppm_before.push_back(obs_mz[i]);
        else st_ppm_before.push_back(Math::getPPM(theo_mz[i], obs_mz[i]));

        double obs_mz_v = obs_mz[i];
        if (use_ppm_) obs_mz_v = Math::ppmToMass(obs_mz_v, theo_mz[i]) + theo_mz[i]; //

        st_ppm_after.push_back(Math::getPPM(theo_mz[i], predict(obs_mz_v))); // predict() is ppm-aware itself

        printf("%4.5f  %4.5f  %2.1f | %2.1f\n", theo_mz[i], obs_mz_v, st_ppm_before.back(), st_ppm_after.back());
      }
      // use median and MAD to ignore outliers
      double m = Math::median(st_ppm_before.begin(), st_ppm_before.end());
      std::cout << "ppm before: median = " << m << "  MAD = " << Math::MAD(st_ppm_before.begin(), st_ppm_before.end(), m) << "\n";
      m = Math::median(st_ppm_after.begin(), st_ppm_after.end()); 
      std::cout << "ppm after : median = " << m << "  MAD = " << Math::MAD(st_ppm_after.begin(), st_ppm_after.end(), m) << "\n";
#endif

      return true;
    }
    catch (Exception::BaseException& /*e*/)
    {
      //LOG_ERROR << "Exception during model fitting: " << e.getMessage() << std::endl;
      return false;
    }
  }