Example #1
0
  void AbsoluteQuantitation::calculateBiasAndR(
    const std::vector<AbsoluteQuantitationStandards::featureConcentration> & component_concentrations,
    const String & feature_name,
    const String & transformation_model,
    const Param & transformation_model_params,
    std::vector<double> & biases,
    double & correlation_coefficient)
  {
    // reset biases
    biases.clear();

    // extract out the calibration points
    std::vector<double> concentration_ratios, feature_amounts_ratios;
    TransformationModel::DataPoints data;
    TransformationModel::DataPoint point;
    for (size_t i = 0; i < component_concentrations.size(); ++i)
    {

      // calculate the actual and calculated concentration ratios
      double calculated_concentration_ratio = applyCalibration(component_concentrations[i].feature,
        component_concentrations[i].IS_feature,
        feature_name,
        transformation_model,
        transformation_model_params);

      double actual_concentration_ratio = component_concentrations[i].actual_concentration/
        component_concentrations[i].IS_actual_concentration;
      concentration_ratios.push_back(component_concentrations[i].actual_concentration);

      // extract out the feature amount ratios
      double feature_amount_ratio = calculateRatio(component_concentrations[i].feature,
        component_concentrations[i].IS_feature,
        feature_name)/component_concentrations[i].dilution_factor;
      feature_amounts_ratios.push_back(feature_amount_ratio);

      // calculate the bias
      double bias = calculateBias(actual_concentration_ratio, calculated_concentration_ratio);
      biases.push_back(bias);

      point.first = actual_concentration_ratio;
      point.second = feature_amount_ratio;
      data.push_back(point);
    }

    // apply weighting to the feature amounts and actual concentration ratios
    TransformationModel tm(data, transformation_model_params);
    tm.weightData(data);
    std::vector<double> concentration_ratios_weighted, feature_amounts_ratios_weighted;
    for (size_t i = 0; i < data.size(); ++i)
    {
      concentration_ratios_weighted.push_back(data[i].first);
      feature_amounts_ratios_weighted.push_back(data[i].second);
    }

    // calculate the R2 (R2 = Pearson_R^2)
    correlation_coefficient = Math::pearsonCorrelationCoefficient(
      concentration_ratios_weighted.begin(), concentration_ratios_weighted.begin() + concentration_ratios_weighted.size(),
      feature_amounts_ratios_weighted.begin(), feature_amounts_ratios_weighted.begin() + feature_amounts_ratios_weighted.size()
    );
  }
  TransformationModelLinear::TransformationModelLinear(
    const TransformationModel::DataPoints & data, const Param & params)
  {
    params_ = params;
    data_given_ = !data.empty();
    if (!data_given_ && params.exists("slope") && (params.exists("intercept")))
    {
      // don't estimate parameters, use given values
      slope_ = params.getValue("slope");
      intercept_ = params.getValue("intercept");
    }
    else     // estimate parameters from data
    {
      Param defaults;
      getDefaultParameters(defaults);
      params_.setDefaults(defaults);
      symmetric_ = params_.getValue("symmetric_regression") == "true";

      size_t size = data.size();
      if (size == 0)       // no data
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
                                         "no data points for 'linear' model");
      }
      else if (size == 1)       // degenerate case, but we can still do something
      {
        slope_ = 1.0;
        intercept_ = data[0].second - data[0].first;
      }
      else       // compute least-squares fit
      {
        vector<double> x(size), y(size);
        for (size_t i = 0; i < size; ++i)
        {
          if (symmetric_)
          {
            x[i] = data[i].second + data[i].first;
            y[i] = data[i].second - data[i].first;
          }
          else
          {
            x[i] = data[i].first;
            y[i] = data[i].second;
          }
        }
        double cov00, cov01, cov11, sumsq;         // covariance values, sum of squares
        double * x_start = &(x[0]), * y_start = &(y[0]);
        gsl_fit_linear(x_start, 1, y_start, 1, size, &intercept_, &slope_,
                       &cov00, &cov01, &cov11, &sumsq);

        if (symmetric_)         // undo coordinate transformation:
        {
          slope_ = (1.0 + slope_) / (1.0 - slope_);
          intercept_ = intercept_ * 1.41421356237309504880;           // 1.41... = sqrt(2)
        }
      }
    }
  }
  TransformationModelLinear::TransformationModelLinear(const TransformationModel::DataPoints& data, const Param& params)
  {
    params_ = params;
    data_given_ = !data.empty();
    if (!data_given_ && params.exists("slope") && (params.exists("intercept")))
    {
      // don't estimate parameters, use given values
      slope_ = params.getValue("slope");
      intercept_ = params.getValue("intercept");
    }
    else // estimate parameters from data
    {
      Param defaults;
      getDefaultParameters(defaults);
      params_.setDefaults(defaults);
      symmetric_ = params_.getValue("symmetric_regression") == "true";

      size_t size = data.size();
      std::vector<Wm5::Vector2d> points;
      if (size == 0) // no data
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
                                         "no data points for 'linear' model");
      }
      else if (size == 1) // degenerate case, but we can still do something
      {
        slope_ = 1.0;
        intercept_ = data[0].second - data[0].first;
      }
      else // compute least-squares fit
      {
        for (size_t i = 0; i < size; ++i)
        {
          points.push_back(Wm5::Vector2d(data[i].first, data[i].second));
        }
        if (!Wm5::HeightLineFit2<double>(static_cast<int>(size), &points.front(), slope_, intercept_))
        {
          throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "TransformationModelLinear", "Unable to fit linear transformation to data points.");
        }
      }
      // update params
      params_.setValue("slope", slope_);
      params_.setValue("intercept", intercept_);
    }
  }
  TransformationModelLowess::TransformationModelLowess(
      const TransformationModel::DataPoints& data_,
      const Param& params) : model_(0)
  {
    // parameter handling/checking:
    params_ = params;
    Param defaults;
    getDefaultParameters(defaults);
    params_.setDefaults(defaults);

    if (data_.size() < 2)
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
                                       "'lowess' model requires more data");
    }

    // TODO copy ... 
    TransformationModel::DataPoints data(data_);

    // sort data
    std::sort(data.begin(), data.end(), cmpFirstDimension);

    vector<double> x(data.size()), y(data.size()), result(data.size());
    double xmin_ = data[0].first;
    double xmax_ = xmin_;
    for (Size i = 0; i < data.size(); ++i)
    {
      x[i] = data[i].first;
      y[i] = data[i].second;
      if (x[i] < xmin_) 
      {
        xmin_ = x[i];
      }
      else if (x[i] > xmax_)
      {
        xmax_ = x[i];
      }
    }

    double span = params_.getValue("span");
    int nsteps = params_.getValue("num_iterations");
    double delta = params_.getValue("delta");
    
    if (delta < 0.0)
    {
      delta = (xmax_ - xmin_) * 0.01; // automatically determine delta
    }

    FastLowessSmoothing::lowess(x, y, span, nsteps, delta, result);

    TransformationModel::DataPoints data_out;
    for (Size i = 0; i < result.size(); ++i)
    {
      data_out.push_back( std::make_pair(x[i], result[i]) );
    }

    // TODO thin out data here ? we may not need that many points here to interpolate ...  it is enough if we store a few datapoints

    Param p;
    TransformationModelInterpolated::getDefaultParameters(p);
    /// p.setValue("interpolation_type", "cspline"); // linear interpolation between lowess pts
    /// p.setValue("extrapolation_type", "four-point-linear");
    p.setValue("interpolation_type", params_.getValue("interpolation_type"));
    p.setValue("extrapolation_type", params_.getValue("extrapolation_type"));

    // create new interpolation model based on the lowess data
    model_ = new TransformationModelInterpolated(data_out, p);
  }
  TransformationModelBSpline::TransformationModelBSpline(
    const TransformationModel::DataPoints & data, const Param & params)
  {
    params_ = params;
    Param defaults;
    getDefaultParameters(defaults);
    params_.setDefaults(defaults);

    if (data.size() < 4)     // TODO: check number
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "'b_spline' model needs at least four data points");
    }
    Size num_breakpoints = params_.getValue("num_breakpoints");
    String break_positions = params_.getValue("break_positions");
    if ((break_positions != "uniform") && (break_positions != "quantiles"))
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "parameter 'break_positions' for 'b_spline' model must be 'uniform' or 'quantiles'");
    }

    size_ = data.size();
    x_ = gsl_vector_alloc(size_);
    y_ = gsl_vector_alloc(size_);
    w_ = gsl_vector_alloc(size_);
    for (size_t i = 0; i < size_; ++i)
    {
      gsl_vector_set(x_, i, data[i].first);
      gsl_vector_set(y_, i, data[i].second);
      gsl_vector_set(w_, i, 1.0);       // TODO: non-uniform weights
    }
    gsl_vector_minmax(x_, &xmin_, &xmax_);

    // set up cubic (k = 4) spline workspace:
    if (num_breakpoints < 2)
    {
      num_breakpoints = 2;
      LOG_WARN << "Warning: Increased parameter 'num_breakpoints' to 2 (minimum)." << endl;
    }
    else if (num_breakpoints > size_ - 2)
    {
      num_breakpoints = size_ - 2;
      LOG_WARN << "Warning: Decreased parameter 'num_breakpoints' to " + String(num_breakpoints) + " (maximum for this number of data points)." << endl;
    }
    workspace_ = gsl_bspline_alloc(4, num_breakpoints);
    if (break_positions == "uniform")
    {
      gsl_bspline_knots_uniform(xmin_, xmax_, workspace_);
    }
    else
    {
      vector<double> quantiles(num_breakpoints, 1.0);
      double step = 1.0 / (num_breakpoints - 1);
      for (Size i = 0; i < num_breakpoints - 1; ++i)
      {
        quantiles[i] = i * step;
      }
      gsl_vector * breakpoints;
      breakpoints = gsl_vector_alloc(num_breakpoints);
      getQuantiles_(x_, quantiles, breakpoints);
      gsl_bspline_knots(breakpoints, workspace_);
      gsl_vector_free(breakpoints);
    }
    ncoeffs_ = gsl_bspline_ncoeffs(workspace_);
    gsl_vector_minmax(workspace_->knots, &xmin_, &xmax_);
    computeFit_();
  }