void ElutionPeakDetection::smoothData(MassTrace& mt, int win_size) const { // alternative smoothing using SavitzkyGolay // looking at the unit test, this method gives better fits than lowess smoothing // reference paper uses lowess smoothing MSSpectrum<PeakType> spectrum; spectrum.insert(spectrum.begin(), mt.begin(), mt.end()); SavitzkyGolayFilter sg; Param param; param.setValue("polynomial_order", 2); param.setValue("frame_length", std::max(3, win_size)); // frame length must be at least polynomial_order+1, otherwise SG will fail sg.setParameters(param); sg.filter(spectrum); MSSpectrum<PeakType>::iterator iter = spectrum.begin(); std::vector<double> smoothed_intensities; for (; iter != spectrum.end(); ++iter) { smoothed_intensities.push_back(iter->getIntensity()); } mt.setSmoothedIntensities(smoothed_intensities); //alternative end // std::cout << "win_size elution: " << scan_time << " " << win_size << std::endl; // if there is no previous FWHM estimation... do it now // if (win_size == 0) // { // mt.estimateFWHM(false); // estimate FWHM // win_size = mt.getFWHMScansNum(); // } // use one global window size for all mass traces to smooth // std::vector<double> rts, ints; // // for (MassTrace::const_iterator c_it = mt.begin(); c_it != mt.end(); ++c_it) // { // rts.push_back(c_it->getRT()); // ints.push_back(c_it->getIntensity()); // } // LowessSmoothing lowess_smooth; // Param lowess_params; // lowess_params.setValue("window_size", win_size); // lowess_smooth.setParameters(lowess_params); // std::vector<double> smoothed_data; // lowess_smooth.smoothData(rts, ints, smoothed_data); // mt.setSmoothedIntensities(smoothed_data); }
double ElutionPeakDetection::computeMassTraceSNR(const MassTrace& tr) { double snr(0.0); if (tr.getSize() > 0) { double noise_area = computeMassTraceNoise(tr) * tr.getTraceLength(); double signal_area = tr.computePeakArea(); snr = signal_area / noise_area; } // std::cout << "snr " << snr << " "; return snr; }
double ElutionPeakDetection::computeApexSNR(const MassTrace& tr) { double noise_level(computeMassTraceNoise(tr)); double snr = 0; if (noise_level > 0.0) { double smoothed_apex_int(tr.getMaxIntensity(true)); snr = smoothed_apex_int / noise_level; } // std::cout << "snr " << snr << " "; return snr; }
double ElutionPeakDetection::computeMassTraceNoise(const MassTrace& tr) { // compute RMSE double squared_sum(0.0); std::vector<double> smooth_ints(tr.getSmoothedIntensities()); for (Size i = 0; i < smooth_ints.size(); ++i) { squared_sum += (tr[i].getIntensity() - smooth_ints[i]) * (tr[i].getIntensity() - smooth_ints[i]); } double rmse(0.0); if (!smooth_ints.empty()) { rmse = std::sqrt(squared_sum / smooth_ints.size()); } return rmse; }
void ElutionPeakDetection::detectElutionPeaks_(MassTrace& mt, std::vector<MassTrace>& single_mtraces) { //smooth data //std::vector<double> smoothed_data; // Size win_size = mt.getFWHMScansNum(); double scan_time(mt.getAverageMS1CycleTime()); Size win_size = std::ceil(chrom_fwhm_ / scan_time); // add smoothed data (original data is still accessible) smoothData(mt, static_cast<Int>(win_size)); // debug intensities // Size i = 0; // std::cout << "*****" << std::endl; // for (MassTrace::const_iterator mt_it = mt.begin(); mt_it != mt.end(); ++mt_it) // { // std::cout << mt_it->getIntensity() << " " << smoothed_data[i] << std::endl; // ++i; // } //std::cout << "*****" << std::endl; std::vector<Size> maxes, mins; findLocalExtrema(mt, win_size / 2, maxes, mins); // if only one maximum exists: finished! if (maxes.size() == 1) { bool pw_ok = true; bool snr_ok = true; // check mass trace filter criteria (if enabled) if (pw_filtering_ == "fixed") { double act_fwhm(mt.estimateFWHM(true)); // std::cout << "act_fwhm: " << act_fwhm << " "; if (act_fwhm < min_fwhm_ || act_fwhm > max_fwhm_) { pw_ok = false; } // std::cout << pw_ok << std::endl; } if (mt_snr_filtering_) { if (computeApexSNR(mt) < chrom_peak_snr_) { snr_ok = false; } } if (pw_ok && snr_ok) { mt.updateSmoothedMaxRT(); if (pw_filtering_ != "fixed") { mt.estimateFWHM(true); } // check for minimum/maximum trace length // double mt_length(std::fabs(mt.rbegin()->getRT() - mt.begin()->getRT())); // if ((mt_length >= min_trace_length_) && (mt_length <= max_trace_length_)) // if (mt_quality >= 1.2) // { #ifdef _OPENMP #pragma omp critical (OPENMS_ElutionPeakDetection_mtraces) #endif single_mtraces.push_back(mt); } } else if (maxes.empty()) { return; } else // split mt to sub-traces { MassTrace::const_iterator cp_it = mt.begin(); Size last_idx(0); // add last data point as last minimum (to grep the last chunk of the MT) mins.push_back(mt.getSize() - 1); for (Size min_idx = 0; min_idx < mins.size(); ++min_idx) { // copy sub-trace between cp_it and split point std::vector<PeakType> tmp_mt; std::vector<double> smoothed_tmp; while (last_idx <= mins[min_idx]) { tmp_mt.push_back(*cp_it); smoothed_tmp.push_back(mt.getSmoothedIntensities()[last_idx]); ++cp_it; ++last_idx; } // check if // if (tmp_mt.size() >= win_size / 2) // { MassTrace new_mt(tmp_mt); // copy smoothed int's new_mt.setSmoothedIntensities(smoothed_tmp); // check filter criteria bool pw_ok = true; bool snr_ok = true; // check mass trace filter criteria (if enabled) if (pw_filtering_ == "fixed") { double act_fwhm(new_mt.estimateFWHM(true)); // std::cout << "act_fwhm: " << act_fwhm << " "; if (act_fwhm < min_fwhm_ || act_fwhm > max_fwhm_) { pw_ok = false; } // std::cout << pw_ok << std::endl; } if (mt_snr_filtering_) { if (computeApexSNR(mt) < chrom_peak_snr_) { snr_ok = false; } } if (pw_ok && snr_ok) { // set label of sub-trace new_mt.setLabel(mt.getLabel() + "." + String(min_idx + 1)); //new_mt.updateWeightedMeanRT(); new_mt.updateSmoothedMaxRT(); //new_mt.updateSmoothedWeightedMeanRT(); new_mt.updateWeightedMeanMZ(); new_mt.updateWeightedMZsd(); if (pw_filtering_ != "fixed") { new_mt.estimateFWHM(true); } // double mt_quality(computeApexSNR(new_mt)); // double new_mt_length(std::fabs(new_mt.rbegin()->getRT() - new_mt.begin()->getRT())); // if ((new_mt_length >= min_trace_length_) && (new_mt_length <= max_trace_length_)) //{ #ifdef _OPENMP #pragma omp critical (OPENMS_ElutionPeakDetection_mtraces) #endif single_mtraces.push_back(new_mt); } // } } } return; }
void ElutionPeakDetection::findLocalExtrema(const MassTrace& tr, const Size& num_neighboring_peaks, std::vector<Size>& chrom_maxes, std::vector<Size>& chrom_mins) { std::vector<double> smoothed_ints_vec(tr.getSmoothedIntensities()); Size mt_length(smoothed_ints_vec.size()); if (mt_length != tr.getSize()) { throw Exception::InvalidValue(__FILE__, __LINE__, __PRETTY_FUNCTION__, "MassTrace was not smoothed before! Aborting...", String(smoothed_ints_vec.size())); } // first make sure that everything is cleared chrom_maxes.clear(); chrom_mins.clear(); // Extract RTs from the chromatogram and store them into into vectors for index access // std::cout << "neighboring peaks: " << num_neighboring_peaks << std::endl; // Store indices along with smoothed_ints to keep track of the peak order std::multimap<double, Size> intensity_indices; boost::dynamic_bitset<> used_idx(mt_length); for (Size i = 0; i < mt_length; ++i) { intensity_indices.insert(std::make_pair(smoothed_ints_vec[i], i)); } for (std::multimap<double, Size>::const_iterator c_it = intensity_indices.begin(); c_it != intensity_indices.end(); ++c_it) { double ref_int = c_it->first; Size ref_idx = c_it->second; if (!(used_idx[ref_idx]) && ref_int > 0.0) { bool real_max = true; // iterate up the RT Size start_idx(0); if (ref_idx > num_neighboring_peaks) { start_idx = ref_idx - num_neighboring_peaks; } Size end_idx = ref_idx + num_neighboring_peaks; if (end_idx > mt_length) { end_idx = mt_length; } for (Size j = start_idx; j < end_idx; ++j) { if (used_idx[j]) { real_max = false; break; } if (j == ref_idx) { continue; } if (smoothed_ints_vec[j] > ref_int) { real_max = false; } } if (real_max) { chrom_maxes.push_back(ref_idx); for (Size j = start_idx; j < end_idx; ++j) { used_idx[j] = true; } } } } std::sort(chrom_maxes.begin(), chrom_maxes.end()); if (chrom_maxes.size() > 1) { Size i(0), j(1); //for (Size i = 0; i < chrom_maxes.size() - 1; ++i) while (i < j && j < chrom_maxes.size()) { // bisection Size left_bound(chrom_maxes[i] + 1); Size right_bound(chrom_maxes[j] - 1); while ((left_bound + 1) < right_bound) { double mid_dist((right_bound - left_bound) / 2.0); Size mid_element_idx(left_bound + std::floor(mid_dist)); double mid_element_int = smoothed_ints_vec[mid_element_idx]; if (mid_element_int <= smoothed_ints_vec[mid_element_idx + 1]) { right_bound = mid_element_idx; } else // or to the right... { left_bound = mid_element_idx; } } Size min_rt((smoothed_ints_vec[left_bound] < smoothed_ints_vec[right_bound]) ? left_bound : right_bound); // check for valley depth between chromatographic peaks double min_int(1.0); if (smoothed_ints_vec[min_rt] > min_int) { min_int = smoothed_ints_vec[min_rt]; } double left_max_int(smoothed_ints_vec[chrom_maxes[i]]); double right_max_int(smoothed_ints_vec[chrom_maxes[j]]); double left_rt(tr[chrom_maxes[i]].getRT()); double mid_rt(tr[min_rt].getRT()); double right_rt(tr[chrom_maxes[j]].getRT()); double left_dist(std::fabs(mid_rt - left_rt)); double right_dist(std::fabs(right_rt - mid_rt)); double min_dist(min_fwhm_ / 2.0); // out debug info // std::cout << tr.getLabel() << ": i,j " << i << "," << j << ":" << left_max_int << " min: " << min_int << " " << right_max_int << " l " << left_rt << " r " << right_rt << " m " << mid_rt << std::endl; if (left_max_int / min_int >= 2.0 && right_max_int / min_int >= 2.0 && left_dist >= min_dist && right_dist >= min_dist) { chrom_mins.push_back(min_rt); // std::cout << "min added!" << std::endl; i = j; ++j; } else { // keep one of the chrom_maxes, iterate the other if (left_max_int > right_max_int) { ++j; } else { i = j; ++j; } } // chrom_mins.push_back(min_rt); } } return; }
void ElutionPeakDetection::detectElutionPeaks_(MassTrace & mt, std::vector<MassTrace> & single_mtraces) { std::vector<DoubleReal> rts, ints; for (MassTrace::const_iterator c_it = mt.begin(); c_it != mt.end(); ++c_it) { rts.push_back(c_it->getRT()); ints.push_back(c_it->getIntensity()); } std::vector<DoubleReal> smoothed_data; LowessSmoothing lowess_smooth; Param lowess_params; // use dynamically computed window sizes // Size win_size = mt.getFWHMScansNum(); // use one global window size for all mass traces to smooth DoubleReal scan_time(mt.getScanTime()); Size win_size = std::ceil(chrom_fwhm_ / scan_time); // std::cout << "win_size elution: " << scan_time << " " << win_size << std::endl; // if there is no previous FWHM estimation... do it now // if (win_size == 0) // { // mt.estimateFWHM(false); // estimate FWHM // win_size = mt.getFWHMScansNum(); // } lowess_params.setValue("window_size", win_size); lowess_smooth.setParameters(lowess_params); lowess_smooth.smoothData(rts, ints, smoothed_data); mt.setSmoothedIntensities(smoothed_data); // debug intensities // Size i = 0; // std::cout << "*****" << std::endl; // for (MassTrace::const_iterator mt_it = mt.begin(); mt_it != mt.end(); ++mt_it) // { // std::cout << mt_it->getIntensity() << " " << smoothed_data[i] << std::endl; // ++i; // } //std::cout << "*****" << std::endl; std::vector<Size> maxes, mins; // mt.findLocalExtrema(win_size / 2, maxes, mins); findLocalExtrema(mt, win_size/2, maxes, mins); // if only one maximum exists: finished! if (maxes.size() == 1) { bool pw_ok = true; bool snr_ok = true; // check mass trace filter criteria (if enabled) if (pw_filtering_ == "fixed") { DoubleReal act_fwhm(mt.estimateFWHM(true)); // std::cout << "act_fwhm: " << act_fwhm << " "; if (act_fwhm < min_fwhm_ || act_fwhm > max_fwhm_) { pw_ok = false; } // std::cout << pw_ok << std::endl; } if (mt_snr_filtering_) { if (computeApexSNR(mt) < chrom_peak_snr_) { snr_ok = false; } } if (pw_ok && snr_ok) { mt.updateSmoothedMaxRT(); if (pw_filtering_ != "fixed") { mt.estimateFWHM(true); } // check for minimum/maximum trace length // DoubleReal mt_length(std::fabs(mt.rbegin()->getRT() - mt.begin()->getRT())); // if ((mt_length >= min_trace_length_) && (mt_length <= max_trace_length_)) // if (mt_quality >= 1.2) // { #ifdef _OPENMP #pragma omp critical #endif single_mtraces.push_back(mt); } } else if (maxes.empty()) { return; } else // split mt to subtraces { MassTrace::const_iterator cp_it = mt.begin(); Size last_idx(0); for (Size min_idx = 0; min_idx < mins.size(); ++min_idx) { // copy subtrace between cp_it and splitpoint std::vector<PeakType> tmp_mt; std::vector<DoubleReal> smoothed_tmp; while (last_idx <= mins[min_idx]) { tmp_mt.push_back(*cp_it); smoothed_tmp.push_back(mt.getSmoothedIntensities()[last_idx]); ++cp_it; ++last_idx; } // check if // if (tmp_mt.size() >= win_size / 2) // { DoubleReal scantime(mt.getScanTime()); MassTrace new_mt(tmp_mt, scantime); // copy smoothed ints new_mt.setSmoothedIntensities(smoothed_tmp); // check filter criteria bool pw_ok = true; bool snr_ok = true; // check mass trace filter criteria (if enabled) if (pw_filtering_ == "fixed") { DoubleReal act_fwhm(new_mt.estimateFWHM(true)); // std::cout << "act_fwhm: " << act_fwhm << " "; if (act_fwhm < min_fwhm_ || act_fwhm > max_fwhm_) { pw_ok = false; } // std::cout << pw_ok << std::endl; } if (mt_snr_filtering_) { if (computeApexSNR(mt) < chrom_peak_snr_) { snr_ok = false; } } if (pw_ok && snr_ok) { // set label of subtrace String tr_num; std::stringstream read_in; read_in << (min_idx + 1); tr_num = "." + read_in.str(); new_mt.setLabel(mt.getLabel() + tr_num); //new_mt.updateWeightedMeanRT(); new_mt.updateSmoothedMaxRT(); //new_mt.updateSmoothedWeightedMeanRT(); new_mt.updateWeightedMeanMZ(); new_mt.updateWeightedMZsd(); if (pw_filtering_ != "fixed") { new_mt.estimateFWHM(true); } // DoubleReal mt_quality(computeApexSNR(new_mt)); // DoubleReal new_mt_length(std::fabs(new_mt.rbegin()->getRT() - new_mt.begin()->getRT())); // if ((new_mt_length >= min_trace_length_) && (new_mt_length <= max_trace_length_)) //{ #ifdef _OPENMP #pragma omp critical #endif single_mtraces.push_back(new_mt); } // } } // don't forget the trailing trace std::vector<PeakType> tmp_mt; std::vector<DoubleReal> smoothed_tmp; while (last_idx < mt.getSize()) { tmp_mt.push_back(*cp_it); smoothed_tmp.push_back(mt.getSmoothedIntensities()[last_idx]); ++cp_it; ++last_idx; } // if (tmp_mt.size() >= win_size / 2) // { DoubleReal scantime(mt.getScanTime()); MassTrace new_mt(tmp_mt, scantime); // copy smoothed ints new_mt.setSmoothedIntensities(smoothed_tmp); // check filter criteria bool pw_ok = true; bool snr_ok = true; // check mass trace filter criteria (if enabled) if (pw_filtering_ == "fixed") { DoubleReal act_fwhm(new_mt.estimateFWHM(true)); // std::cout << "act_fwhm: " << act_fwhm << " "; if (act_fwhm < min_fwhm_ || act_fwhm > max_fwhm_) { pw_ok = false; } // std::cout << pw_ok << std::endl; } if (mt_snr_filtering_) { if (computeApexSNR(mt) < chrom_peak_snr_) { snr_ok = false; } } if (pw_ok && snr_ok) { // set label of subtrace String tr_num; std::stringstream read_in; read_in << (mins.size() + 1); tr_num = "." + read_in.str(); new_mt.setLabel(mt.getLabel() + tr_num); new_mt.updateSmoothedMaxRT(); new_mt.updateWeightedMeanMZ(); new_mt.updateWeightedMZsd(); if (pw_filtering_ != "fixed") { new_mt.estimateFWHM(true); } // DoubleReal mt_quality(computeApexSNR(new_mt)); // DoubleReal mt_length(std::fabs(new_mt.rbegin()->getRT() - new_mt.begin()->getRT())); // if ((mt_length >= min_trace_length_) && (mt_length <= max_trace_length_)) // { #ifdef _OPENMP #pragma omp critical #endif single_mtraces.push_back(new_mt); } // } } return; }