Esempio n. 1
0
  void PeakPickerMaxima::findMaxima(const std::vector<double>& mz_array, 
      const std::vector<double>& int_array, std::vector<PeakCandidate>& pc, 
      bool check_spacings)
  {
    // don't pick a spectrum with less than 5 data points
    if (mz_array.size() < 5) return;

    // signal-to-noise estimation
    SignalToNoiseEstimatorMedianRapid::NoiseEstimator noise_estimator(0, 0, 0);
    if (signal_to_noise_ > 0.0)
    {
      SignalToNoiseEstimatorMedianRapid rapid_sne(sn_window_length_);
      noise_estimator = rapid_sne.estimateNoise(mz_array, int_array);
    }

    // find local maxima in profile data
    for (size_t i = 2; i < mz_array.size() - 2; ++i)
    {
      double central_peak_mz = mz_array[i], central_peak_int = int_array[i];
      double left_neighbor_mz = mz_array[i - 1], left_neighbor_int = int_array[i - 1];
      double right_neighbor_mz = mz_array[i + 1], right_neighbor_int = int_array[i + 1];

      // do not interpolate when the left or right support is a zero-data-point
      if (std::fabs(left_neighbor_int) < std::numeric_limits<double>::epsilon()) continue;
      if (std::fabs(right_neighbor_int) < std::numeric_limits<double>::epsilon()) continue;

      // MZ spacing sanity checks
      double left_to_central = std::fabs(central_peak_mz - left_neighbor_mz);
      double central_to_right = std::fabs(right_neighbor_mz - central_peak_mz);
      double min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;

      double act_snt = 0.0, act_snt_l1 = 0.0, act_snt_r1 = 0.0;
      if (signal_to_noise_ > 0.0)
      {
        act_snt = central_peak_int / noise_estimator.get_noise_value(central_peak_mz);
        act_snt_l1 = left_neighbor_int / noise_estimator.get_noise_value(left_neighbor_mz);
        act_snt_r1 = right_neighbor_int / noise_estimator.get_noise_value(right_neighbor_mz);
      }

      // look for peak cores meeting MZ and intensity/SNT criteria
      if ((central_peak_int > left_neighbor_int) && 
          (central_peak_int > right_neighbor_int) && 
          (act_snt >= signal_to_noise_) && 
          (act_snt_l1 >= signal_to_noise_) && 
          (act_snt_r1 >= signal_to_noise_) &&
          (!check_spacings || 
           ((left_to_central < spacing_difference_ * min_spacing) && 
            (central_to_right < spacing_difference_ * min_spacing))))
      {
        // special case: if a peak core is surrounded by more intense
        // satellite peaks (indicates oscillation rather than
        // real peaks) -> remove

        double act_snt_l2 = 0.0, act_snt_r2 = 0.0;
        PeakCandidate candidate;
        candidate.pos = i;
        candidate.mz_max = -1;
        candidate.int_max = -1;

        if (signal_to_noise_ > 0.0)
        {
          act_snt_l2 = int_array[i - 2] / noise_estimator.get_noise_value(mz_array[i - 2]);
          act_snt_r2 = int_array[i + 2] / noise_estimator.get_noise_value(mz_array[i + 2]);
        }

        if (i > 1
            && (i + 2) < mz_array.size()
            && left_neighbor_int < int_array[i - 2]
            && right_neighbor_int < int_array[i + 2]
            && act_snt_l2 >= signal_to_noise_
            && act_snt_r2 >= signal_to_noise_
            && (!check_spacings || std::fabs(left_neighbor_mz - mz_array[i - 2]) < spacing_difference_ * min_spacing)
            && (!check_spacings || std::fabs(mz_array[i + 2] - right_neighbor_mz) < spacing_difference_ * min_spacing)
            )
        {
          ++i;
          continue;
        }

        double boundary_mz, boundary_int;

        // peak core found, now extend it
        // to the left
        size_t k = 2;

        bool previous_zero_left(false);    // no need to extend peak if previous intensity was zero
        bool previous_zero_right(false);   // no need to extend peak if previous intensity was zero
        size_t missing_left(0);
        size_t missing_right(0);

        boundary_mz = left_neighbor_mz;
        boundary_int = left_neighbor_int;

        while (k <= i // prevent underflow
              && (i - k + 1) > 0
              && !previous_zero_left
              && (missing_left < missing_)
              && int_array[i - k] <= boundary_int
              && (!check_spacings || (std::fabs(mz_array[i - k] - boundary_mz) < spacing_difference_gap_ * min_spacing))
              )
        {
          // Obtain S/N value (only if parameter is turned on)
          double act_snt_lk = 0.0;
          if (signal_to_noise_ > 0.0)
          {
            act_snt_lk = int_array[i - k] / noise_estimator.get_noise_value(mz_array[i - k]);
          }

          if (act_snt_lk >= signal_to_noise_ 
              && (!check_spacings || (std::fabs(mz_array[i - k] - boundary_mz) < spacing_difference_ * min_spacing)))
          {
            boundary_mz = mz_array[i - k];
            boundary_int = int_array[i - k];
          }
          else
          {
            boundary_mz = mz_array[i - k];
            boundary_int = int_array[i - k];
            ++missing_left;
          }

          previous_zero_left = (std::fabs(int_array[i - k]) < std::numeric_limits<double>::epsilon());
          ++k;
        }
        candidate.left_boundary = i - k + 1;

        // If we walked one too far, lets backtrack
        if (missing_left >= missing_) candidate.left_boundary--;

        // to the right
        k = 2;
        boundary_mz = right_neighbor_mz;
        boundary_int = right_neighbor_int;
        while ((i + k) < mz_array.size() // prevent overflow
              && !previous_zero_right
              && (missing_right < missing_)
              && int_array[i + k] <= boundary_int
              && (!check_spacings || (std::fabs(mz_array[i + k] - boundary_mz) < spacing_difference_gap_ * min_spacing))
              )
        {
          // Obtain S/N value (only if parameter is turned on)
          double act_snt_rk = 0.0;
          if (signal_to_noise_ > 0.0)
          {
            act_snt_rk = int_array[i + k] / noise_estimator.get_noise_value(mz_array[i + k]);
          }

          if (act_snt_rk >= signal_to_noise_ 
              && (!check_spacings || (std::fabs(mz_array[i + k] - boundary_mz) < spacing_difference_ * min_spacing)))
          {
            boundary_mz = mz_array[i + k];
            boundary_int = int_array[i + k];
          }
          else
          {
            boundary_mz = mz_array[i + k];
            boundary_int = int_array[i + k];
            ++missing_right;
          }

          previous_zero_right = (std::fabs(int_array[i + k]) < std::numeric_limits<double>::epsilon());
          ++k;
        }
        candidate.right_boundary = i + k - 1;

        // If we walked one too far, lets backtrack
        if (missing_right >= missing_) candidate.right_boundary--;

        // jump over raw data points that have been considered already
        i = i + k - 1;
        pc.push_back(candidate);
      }
    }
  }
Esempio n. 2
0
  void PeakPickerHiRes::pick(const MSSpectrum& input, MSSpectrum& output, std::vector<PeakBoundary>& boundaries, bool check_spacings) const
  {
    // copy meta data of the input spectrum
    output.clear(true);
    output.SpectrumSettings::operator=(input);
    output.MetaInfoInterface::operator=(input);
    output.setRT(input.getRT());
    output.setMSLevel(input.getMSLevel());
    output.setName(input.getName());
    output.setType(SpectrumSettings::CENTROID);
    if (report_FWHM_)
    {
      output.getFloatDataArrays().resize(1);
      output.getFloatDataArrays()[0].setName( report_FWHM_as_ppm_ ? "FWHM_ppm" : "FWHM");
    }

    // don't pick a spectrum with less than 5 data points
    if (input.size() < 5) return;

    // if both spacing constraints are disabled, don't check spacings at all:
    if ((spacing_difference_ == std::numeric_limits<double>::infinity()) &&
      (spacing_difference_gap_ == std::numeric_limits<double>::infinity()))
    {
      check_spacings = false;
    }

    // signal-to-noise estimation
    SignalToNoiseEstimatorMedian<MSSpectrum > snt;
    snt.setParameters(param_.copy("SignalToNoise:", true));

    if (signal_to_noise_ > 0.0)
    {
      snt.init(input);
    }

    // find local maxima in profile data
    for (Size i = 2; i < input.size() - 2; ++i)
    {
      double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
      double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
      double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();

      // do not interpolate when the left or right support is a zero-data-point
      if (std::fabs(left_neighbor_int) < std::numeric_limits<double>::epsilon()) continue;
      if (std::fabs(right_neighbor_int) < std::numeric_limits<double>::epsilon()) continue;

      // MZ spacing sanity checks
      double left_to_central = 0.0, central_to_right = 0.0, min_spacing = 0.0;
      if (check_spacings)
      {
        left_to_central = central_peak_mz - left_neighbor_mz;
        central_to_right = right_neighbor_mz - central_peak_mz;
        min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
      }

      double act_snt = 0.0, act_snt_l1 = 0.0, act_snt_r1 = 0.0;
      if (signal_to_noise_ > 0.0)
      {
        act_snt = snt.getSignalToNoise(input[i]);
        act_snt_l1 = snt.getSignalToNoise(input[i - 1]);
        act_snt_r1 = snt.getSignalToNoise(input[i + 1]);
      }

      // look for peak cores meeting MZ and intensity/SNT criteria
      if ((central_peak_int > left_neighbor_int) && 
        (central_peak_int > right_neighbor_int) && 
        (act_snt >= signal_to_noise_) && 
        (act_snt_l1 >= signal_to_noise_) && 
        (act_snt_r1 >= signal_to_noise_) &&
        (!check_spacings || 
        ((left_to_central < spacing_difference_ * min_spacing) && 
          (central_to_right < spacing_difference_ * min_spacing))))
      {
        // special case: if a peak core is surrounded by more intense
        // satellite peaks (indicates oscillation rather than
        // real peaks) -> remove

        double act_snt_l2 = 0.0, act_snt_r2 = 0.0;

        if (signal_to_noise_ > 0.0)
        {
          act_snt_l2 = snt.getSignalToNoise(input[i - 2]);
          act_snt_r2 = snt.getSignalToNoise(input[i + 2]);
        }

        // checking signal-to-noise?
        if ((i > 1) &&
          (i + 2 < input.size()) &&
          (left_neighbor_int < input[i - 2].getIntensity()) &&
          (right_neighbor_int < input[i + 2].getIntensity()) &&
          (act_snt_l2 >= signal_to_noise_) &&
          (act_snt_r2 >= signal_to_noise_) &&
          (!check_spacings ||
          ((left_neighbor_mz - input[i - 2].getMZ() < spacing_difference_ * min_spacing) && 
            (input[i + 2].getMZ() - right_neighbor_mz < spacing_difference_ * min_spacing))))
        {
          ++i;
          continue;
        }

        std::map<double, double> peak_raw_data;

        peak_raw_data[central_peak_mz] = central_peak_int;
        peak_raw_data[left_neighbor_mz] = left_neighbor_int;
        peak_raw_data[right_neighbor_mz] = right_neighbor_int;

        // peak core found, now extend it
        // to the left
        Size k = 2;

        bool previous_zero_left(false); // no need to extend peak if previous intensity was zero
        Size missing_left(0);
        Size left_boundary(i - 1); // index of the left boundary for the spline interpolation

        while ((k <= i) && // prevent underflow
          (i - k + 1 > 0) && 
          !previous_zero_left && 
          (missing_left <= missing_) && 
          (input[i - k].getIntensity() <= peak_raw_data.begin()->second) &&
          (!check_spacings || 
          (peak_raw_data.begin()->first - input[i - k].getMZ() < spacing_difference_gap_ * min_spacing)))
        {
          double act_snt_lk = 0.0;

          if (signal_to_noise_ > 0.0)
          {
            act_snt_lk = snt.getSignalToNoise(input[i - k]);
          }

          if ((act_snt_lk >= signal_to_noise_) && 
            (!check_spacings ||
            (peak_raw_data.begin()->first - input[i - k].getMZ() < spacing_difference_ * min_spacing)))
          {
            peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
          }
          else
          {
            ++missing_left;
            if (missing_left <= missing_)
            {
              peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
            }
          }

          previous_zero_left = (input[i - k].getIntensity() == 0);
          left_boundary = i - k;
          ++k;
        }

        // to the right
        k = 2;

        bool previous_zero_right(false); // no need to extend peak if previous intensity was zero
        Size missing_right(0);
        Size right_boundary(i+1); // index of the right boundary for the spline interpolation

        while ((i + k < input.size()) && 
          !previous_zero_right && 
          (missing_right <= missing_) && 
          (input[i + k].getIntensity() <= peak_raw_data.rbegin()->second) &&
          (!check_spacings ||
          (input[i + k].getMZ() - peak_raw_data.rbegin()->first < spacing_difference_gap_ * min_spacing)))
        {
          double act_snt_rk = 0.0;

          if (signal_to_noise_ > 0.0)
          {
            act_snt_rk = snt.getSignalToNoise(input[i + k]);
          }

          if ((act_snt_rk >= signal_to_noise_) && 
            (!check_spacings ||
            (input[i + k].getMZ() - peak_raw_data.rbegin()->first < spacing_difference_ * min_spacing)))
          {
            peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
          }
          else
          {
            ++missing_right;
            if (missing_right <= missing_)
            {
              peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
            }
          }

          previous_zero_right = (input[i + k].getIntensity() == 0);
          right_boundary = i + k;
          ++k;
        }

        // skip if the minimal number of 3 points for fitting is not reached
        if (peak_raw_data.size() < 3) continue;

        CubicSpline2d peak_spline (peak_raw_data);

        // calculate maximum by evaluating the spline's 1st derivative
        // (bisection method)
        double max_peak_mz = central_peak_mz;
        double max_peak_int = central_peak_int;
        double threshold = 1e-6;
        OpenMS::Math::spline_bisection(peak_spline, left_neighbor_mz, right_neighbor_mz, max_peak_mz, max_peak_int, threshold);

        //
        // compute FWHM
        //
        if (report_FWHM_)
        {
          double fwhm_int = max_peak_int / 2.0;
          threshold = 0.01 * fwhm_int;
          double mz_mid, int_mid; 
          // left:
          double mz_left = peak_raw_data.begin()->first;
          double mz_center = max_peak_mz;
          if (peak_spline.eval(mz_left) > fwhm_int)
          { // the spline ends before half max is reached -- take the leftmost point (probably an underestimation)
            mz_mid = mz_left;
          }
          else
          {
            do 
            {
              mz_mid = mz_left / 2 + mz_center / 2;
              int_mid = peak_spline.eval(mz_mid);
              if (int_mid < fwhm_int)
              {
                mz_left = mz_mid;
              }
              else
              {
                mz_center = mz_mid;
              }
            } while (fabs(int_mid - fwhm_int) > threshold);
          }
          const double fwhm_left_mz = mz_mid;

          // right ...
          double mz_right = peak_raw_data.rbegin()->first;
          mz_center = max_peak_mz;
          if (peak_spline.eval(mz_right) > fwhm_int)
          { // the spline ends before half max is reached -- take the rightmost point (probably an underestimation)
            mz_mid = mz_right;
          }
          else
          {
            do 
            {
              mz_mid = mz_right / 2 + mz_center / 2;
              int_mid = peak_spline.eval(mz_mid);
              if (int_mid < fwhm_int)
              {
                mz_right = mz_mid;
              }
              else
              {
                mz_center = mz_mid;
              }

            } while (fabs(int_mid - fwhm_int) > threshold);
          }
          const double fwhm_right_mz = mz_mid;
          const double fwhm_absolute = fwhm_right_mz - fwhm_left_mz;
          output.getFloatDataArrays()[0].push_back( report_FWHM_as_ppm_ ? fwhm_absolute / max_peak_mz  * 1e6 : fwhm_absolute);
        } // FWHM

          // save picked peak into output spectrum
        Peak1D peak;
        PeakBoundary peak_boundary;
        peak.setMZ(max_peak_mz);
        peak.setIntensity(max_peak_int);
        peak_boundary.mz_min = input[left_boundary].getMZ();
        peak_boundary.mz_max = input[right_boundary].getMZ();
        output.push_back(peak);

        boundaries.push_back(peak_boundary);

        // jump over profile data points that have been considered already
        i = i + k - 1;
      }
    }

    return;
  }