int AbsoluteQuantitation::residualOutlierCandidate_( const std::vector<AbsoluteQuantitationStandards::featureConcentration>& component_concentrations, const String & feature_name, const String & transformation_model, const Param & transformation_model_params) { // Returns candidate outlier: A linear regression and residuals are calculated for // the data points. The one with highest residual error is selected as the outlier candidate. The // corresponding iterator position is then returned. // fit the model Param optimized_params = fitCalibration(component_concentrations, feature_name, transformation_model, transformation_model_params); // calculate the R2 and bias std::vector<double> biases; double correlation_coefficient = 0.0; calculateBiasAndR( component_concentrations, feature_name, transformation_model, optimized_params, biases, correlation_coefficient); return max_element(biases.begin(), biases.end()) - biases.begin(); }
int AbsoluteQuantitation::jackknifeOutlierCandidate_( const std::vector<AbsoluteQuantitationStandards::featureConcentration>& component_concentrations, const String & feature_name, const String & transformation_model, const Param & transformation_model_params) { // Returns candidate outlier: A linear regression and rsq is calculated for // the data points with one removed pair. The combination resulting in // highest rsq is considered corresponding to the outlier candidate. The // corresponding iterator position is then returned. std::vector<double> rsq_tmp; Param optimized_params = transformation_model_params; for (Size i = 0; i < component_concentrations.size(); i++) { std::vector<AbsoluteQuantitationStandards::featureConcentration> component_concentrations_tmp = component_concentrations; component_concentrations_tmp.erase(component_concentrations_tmp.begin() + i); // debugging: // std::cout << "jackknifeOutlierCandidate_: size of component_concentrations: " << std::to_string(component_concentrations_tmp.size()) << std::endl; // fit the model optimized_params = fitCalibration(component_concentrations_tmp, feature_name, transformation_model, optimized_params); // calculate the R2 and bias std::vector<double> biases; double correlation_coefficient = 0.0; calculateBiasAndR( component_concentrations_tmp, feature_name, transformation_model, optimized_params, biases, correlation_coefficient); rsq_tmp.push_back(correlation_coefficient); } return max_element(rsq_tmp.begin(), rsq_tmp.end()) - rsq_tmp.begin(); }
bool AbsoluteQuantitation::optimizeCalibrationCurveIterative( std::vector<AbsoluteQuantitationStandards::featureConcentration> & component_concentrations, const String & feature_name, const String & transformation_model, const Param & transformation_model_params, Param & optimized_params) { // sort from min to max concentration std::vector<AbsoluteQuantitationStandards::featureConcentration> component_concentrations_sorted = component_concentrations; std::sort(component_concentrations_sorted.begin(), component_concentrations_sorted.end(), [](AbsoluteQuantitationStandards::featureConcentration lhs, AbsoluteQuantitationStandards::featureConcentration rhs) { return lhs.actual_concentration < rhs.actual_concentration; //ascending order } ); // indices of component_concentrations std::vector<size_t> component_concentrations_sorted_indices;// loop from all points to min_points for (size_t index = 0; index < component_concentrations_sorted.size(); ++index) { component_concentrations_sorted_indices.push_back(index); } // starting parameters optimized_params = transformation_model_params; // for (size_t n_iters = 0; n_iters < max_iters_; ++n_iters) for (size_t n_iters = 0; n_iters < component_concentrations_sorted.size(); ++n_iters) { // extract out components const std::vector<AbsoluteQuantitationStandards::featureConcentration> component_concentrations_sub = extractComponents_( component_concentrations_sorted, component_concentrations_sorted_indices); // check if the min number of calibration points has been broken if (component_concentrations_sorted_indices.size() < min_points_) { LOG_INFO << "No optimal calibration found for " << component_concentrations_sub[0].feature.getMetaValue("native_id") << " ."; return false; //no optimal calibration found } // fit the model optimized_params = fitCalibration(component_concentrations_sub, feature_name, transformation_model, optimized_params); // calculate the R2 and bias std::vector<double> biases; // not needed (method parameters) double correlation_coefficient = 0.0; // not needed (method parameters) calculateBiasAndR( component_concentrations_sub, feature_name, transformation_model, optimized_params, biases, correlation_coefficient); // check R2 and biases bool bias_check = true; for (size_t bias_it = 0; bias_it < biases.size(); ++bias_it) { if (biases[bias_it] > max_bias_) { bias_check = false; } } if (bias_check && correlation_coefficient > min_correlation_coefficient_) { LOG_INFO << "Valid calibration found for " << component_concentrations_sub[0].feature.getMetaValue("native_id") << " ."; // copy over the final optimized points before exiting component_concentrations = component_concentrations_sub; return true; //optimal calibration found } // R2 and biases check failed, determine potential outlier int pos; if (outlier_detection_method_ == "iter_jackknife") { // get candidate outlier: removal of which datapoint results in best rsq? pos = jackknifeOutlierCandidate_( component_concentrations_sub, feature_name, transformation_model, optimized_params); } else if (outlier_detection_method_ == "iter_residual") { // get candidate outlier: removal of datapoint with largest residual? pos = residualOutlierCandidate_( component_concentrations_sub, feature_name, transformation_model, optimized_params); } else { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Method ") + outlier_detection_method_ + " is not a valid method for optimizeCalibrationCurveIterative"); } // remove if residual is an outlier according to Chauvenet's criterion // or if testing is turned off if (!use_chauvenet_ || MRMRTNormalizer::chauvenet(biases, pos)) { component_concentrations_sorted_indices.erase(component_concentrations_sorted_indices.begin() + pos); } else { return false; //no optimal calibration found } } return false; //no optimal calibration found }