void AbsoluteQuantitation::calculateBiasAndR( const std::vector<AbsoluteQuantitationStandards::featureConcentration> & component_concentrations, const String & feature_name, const String & transformation_model, const Param & transformation_model_params, std::vector<double> & biases, double & correlation_coefficient) { // reset biases biases.clear(); // extract out the calibration points std::vector<double> concentration_ratios, feature_amounts_ratios; TransformationModel::DataPoints data; TransformationModel::DataPoint point; for (size_t i = 0; i < component_concentrations.size(); ++i) { // calculate the actual and calculated concentration ratios double calculated_concentration_ratio = applyCalibration(component_concentrations[i].feature, component_concentrations[i].IS_feature, feature_name, transformation_model, transformation_model_params); double actual_concentration_ratio = component_concentrations[i].actual_concentration/ component_concentrations[i].IS_actual_concentration; concentration_ratios.push_back(component_concentrations[i].actual_concentration); // extract out the feature amount ratios double feature_amount_ratio = calculateRatio(component_concentrations[i].feature, component_concentrations[i].IS_feature, feature_name)/component_concentrations[i].dilution_factor; feature_amounts_ratios.push_back(feature_amount_ratio); // calculate the bias double bias = calculateBias(actual_concentration_ratio, calculated_concentration_ratio); biases.push_back(bias); point.first = actual_concentration_ratio; point.second = feature_amount_ratio; data.push_back(point); } // apply weighting to the feature amounts and actual concentration ratios TransformationModel tm(data, transformation_model_params); tm.weightData(data); std::vector<double> concentration_ratios_weighted, feature_amounts_ratios_weighted; for (size_t i = 0; i < data.size(); ++i) { concentration_ratios_weighted.push_back(data[i].first); feature_amounts_ratios_weighted.push_back(data[i].second); } // calculate the R2 (R2 = Pearson_R^2) correlation_coefficient = Math::pearsonCorrelationCoefficient( concentration_ratios_weighted.begin(), concentration_ratios_weighted.begin() + concentration_ratios_weighted.size(), feature_amounts_ratios_weighted.begin(), feature_amounts_ratios_weighted.begin() + feature_amounts_ratios_weighted.size() ); }
TransformationModelLinear::TransformationModelLinear( const TransformationModel::DataPoints & data, const Param & params) { params_ = params; data_given_ = !data.empty(); if (!data_given_ && params.exists("slope") && (params.exists("intercept"))) { // don't estimate parameters, use given values slope_ = params.getValue("slope"); intercept_ = params.getValue("intercept"); } else // estimate parameters from data { Param defaults; getDefaultParameters(defaults); params_.setDefaults(defaults); symmetric_ = params_.getValue("symmetric_regression") == "true"; size_t size = data.size(); if (size == 0) // no data { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "no data points for 'linear' model"); } else if (size == 1) // degenerate case, but we can still do something { slope_ = 1.0; intercept_ = data[0].second - data[0].first; } else // compute least-squares fit { vector<double> x(size), y(size); for (size_t i = 0; i < size; ++i) { if (symmetric_) { x[i] = data[i].second + data[i].first; y[i] = data[i].second - data[i].first; } else { x[i] = data[i].first; y[i] = data[i].second; } } double cov00, cov01, cov11, sumsq; // covariance values, sum of squares double * x_start = &(x[0]), * y_start = &(y[0]); gsl_fit_linear(x_start, 1, y_start, 1, size, &intercept_, &slope_, &cov00, &cov01, &cov11, &sumsq); if (symmetric_) // undo coordinate transformation: { slope_ = (1.0 + slope_) / (1.0 - slope_); intercept_ = intercept_ * 1.41421356237309504880; // 1.41... = sqrt(2) } } } }
TransformationModelLinear::TransformationModelLinear(const TransformationModel::DataPoints& data, const Param& params) { params_ = params; data_given_ = !data.empty(); if (!data_given_ && params.exists("slope") && (params.exists("intercept"))) { // don't estimate parameters, use given values slope_ = params.getValue("slope"); intercept_ = params.getValue("intercept"); } else // estimate parameters from data { Param defaults; getDefaultParameters(defaults); params_.setDefaults(defaults); symmetric_ = params_.getValue("symmetric_regression") == "true"; size_t size = data.size(); std::vector<Wm5::Vector2d> points; if (size == 0) // no data { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "no data points for 'linear' model"); } else if (size == 1) // degenerate case, but we can still do something { slope_ = 1.0; intercept_ = data[0].second - data[0].first; } else // compute least-squares fit { for (size_t i = 0; i < size; ++i) { points.push_back(Wm5::Vector2d(data[i].first, data[i].second)); } if (!Wm5::HeightLineFit2<double>(static_cast<int>(size), &points.front(), slope_, intercept_)) { throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "TransformationModelLinear", "Unable to fit linear transformation to data points."); } } // update params params_.setValue("slope", slope_); params_.setValue("intercept", intercept_); } }
TransformationModelLowess::TransformationModelLowess( const TransformationModel::DataPoints& data_, const Param& params) : model_(0) { // parameter handling/checking: params_ = params; Param defaults; getDefaultParameters(defaults); params_.setDefaults(defaults); if (data_.size() < 2) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "'lowess' model requires more data"); } // TODO copy ... TransformationModel::DataPoints data(data_); // sort data std::sort(data.begin(), data.end(), cmpFirstDimension); vector<double> x(data.size()), y(data.size()), result(data.size()); double xmin_ = data[0].first; double xmax_ = xmin_; for (Size i = 0; i < data.size(); ++i) { x[i] = data[i].first; y[i] = data[i].second; if (x[i] < xmin_) { xmin_ = x[i]; } else if (x[i] > xmax_) { xmax_ = x[i]; } } double span = params_.getValue("span"); int nsteps = params_.getValue("num_iterations"); double delta = params_.getValue("delta"); if (delta < 0.0) { delta = (xmax_ - xmin_) * 0.01; // automatically determine delta } FastLowessSmoothing::lowess(x, y, span, nsteps, delta, result); TransformationModel::DataPoints data_out; for (Size i = 0; i < result.size(); ++i) { data_out.push_back( std::make_pair(x[i], result[i]) ); } // TODO thin out data here ? we may not need that many points here to interpolate ... it is enough if we store a few datapoints Param p; TransformationModelInterpolated::getDefaultParameters(p); /// p.setValue("interpolation_type", "cspline"); // linear interpolation between lowess pts /// p.setValue("extrapolation_type", "four-point-linear"); p.setValue("interpolation_type", params_.getValue("interpolation_type")); p.setValue("extrapolation_type", params_.getValue("extrapolation_type")); // create new interpolation model based on the lowess data model_ = new TransformationModelInterpolated(data_out, p); }
TransformationModelBSpline::TransformationModelBSpline( const TransformationModel::DataPoints & data, const Param & params) { params_ = params; Param defaults; getDefaultParameters(defaults); params_.setDefaults(defaults); if (data.size() < 4) // TODO: check number { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "'b_spline' model needs at least four data points"); } Size num_breakpoints = params_.getValue("num_breakpoints"); String break_positions = params_.getValue("break_positions"); if ((break_positions != "uniform") && (break_positions != "quantiles")) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "parameter 'break_positions' for 'b_spline' model must be 'uniform' or 'quantiles'"); } size_ = data.size(); x_ = gsl_vector_alloc(size_); y_ = gsl_vector_alloc(size_); w_ = gsl_vector_alloc(size_); for (size_t i = 0; i < size_; ++i) { gsl_vector_set(x_, i, data[i].first); gsl_vector_set(y_, i, data[i].second); gsl_vector_set(w_, i, 1.0); // TODO: non-uniform weights } gsl_vector_minmax(x_, &xmin_, &xmax_); // set up cubic (k = 4) spline workspace: if (num_breakpoints < 2) { num_breakpoints = 2; LOG_WARN << "Warning: Increased parameter 'num_breakpoints' to 2 (minimum)." << endl; } else if (num_breakpoints > size_ - 2) { num_breakpoints = size_ - 2; LOG_WARN << "Warning: Decreased parameter 'num_breakpoints' to " + String(num_breakpoints) + " (maximum for this number of data points)." << endl; } workspace_ = gsl_bspline_alloc(4, num_breakpoints); if (break_positions == "uniform") { gsl_bspline_knots_uniform(xmin_, xmax_, workspace_); } else { vector<double> quantiles(num_breakpoints, 1.0); double step = 1.0 / (num_breakpoints - 1); for (Size i = 0; i < num_breakpoints - 1; ++i) { quantiles[i] = i * step; } gsl_vector * breakpoints; breakpoints = gsl_vector_alloc(num_breakpoints); getQuantiles_(x_, quantiles, breakpoints); gsl_bspline_knots(breakpoints, workspace_); gsl_vector_free(breakpoints); } ncoeffs_ = gsl_bspline_ncoeffs(workspace_); gsl_vector_minmax(workspace_->knots, &xmin_, &xmax_); computeFit_(); }