void InternalCalibration::calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name)
  {
    checkReferenceIds_(ref_ids);

    calibrated_feature_map = feature_map;
    // clear the ids
    for (Size f = 0; f < calibrated_feature_map.size(); ++f)
    {
      calibrated_feature_map[f].getPeptideIdentifications().clear();
    }

    // map the reference ids onto the features
    IDMapper mapper;
    Param param;
    param.setValue("rt_tolerance", (DoubleReal)param_.getValue("rt_tolerance"));
    param.setValue("mz_tolerance", param_.getValue("mz_tolerance"));
    param.setValue("mz_measure", param_.getValue("mz_tolerance_unit"));
    mapper.setParameters(param);
    std::vector<ProteinIdentification> vec;
    mapper.annotate(calibrated_feature_map, ref_ids, vec);

    // calibrate
    calibrateMapGlobally(calibrated_feature_map, calibrated_feature_map, trafo_file_name);

    // copy the old ids
    calibrated_feature_map.setUnassignedPeptideIdentifications(feature_map.getUnassignedPeptideIdentifications());
    for (Size f = 0; f < feature_map.size(); ++f)
    {
      calibrated_feature_map[f].getPeptideIdentifications().clear();
      if (!feature_map[f].getPeptideIdentifications().empty())
      {
        calibrated_feature_map[f].setPeptideIdentifications(feature_map[f].getPeptideIdentifications());
      }
    }
  }
 void TransformationModelBSpline::getDefaultParameters(Param & params)
 {
   params.clear();
   params.setValue("num_breakpoints", 5, "Number of breakpoints of the cubic spline in the smoothing step. More breakpoints mean less smoothing. Reduce this number if the transformation has an unexpected shape.");
   params.setMinInt("num_breakpoints", 2);
   params.setValue("break_positions", "uniform", "How to distribute the breakpoints on the retention time scale. 'uniform': intervals of equal size; 'quantiles': equal number of data points per interval.");
   params.setValidStrings("break_positions", StringList::create("uniform,quantiles"));
 }
  double SpectrumAlignmentScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const
  {
    const double tolerance = (double)param_.getValue("tolerance");
    bool is_relative_tolerance = param_.getValue("is_relative_tolerance").toBool();
    bool use_linear_factor = param_.getValue("use_linear_factor").toBool();
    bool use_gaussian_factor = param_.getValue("use_gaussian_factor").toBool();

    if (use_linear_factor && use_gaussian_factor)
    {
      cerr << "Warning: SpectrumAlignmentScore, use either 'use_linear_factor' or 'use_gaussian_factor'!" << endl;
    }

    SpectrumAlignment aligner;
    Param p;
    p.setValue("tolerance", tolerance);
    p.setValue("is_relative_tolerance", (String)param_.getValue("is_relative_tolerance"));
    aligner.setParameters(p);

    vector<pair<Size, Size> > alignment;
    aligner.getSpectrumAlignment(alignment, s1, s2);

    double score(0), sum(0), sum1(0), sum2(0);
    for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1)
    {
      sum1 += it1->getIntensity() * it1->getIntensity();
    }

    for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1)
    {
      sum2 += it1->getIntensity() * it1->getIntensity();
    }

    for (vector<pair<Size, Size> >::const_iterator it = alignment.begin(); it != alignment.end(); ++it)
    {
      //double factor(0.0);
      //factor = (epsilon - fabs(s1[it->first].getPosition()[0] - s2[it->second].getPosition()[0])) / epsilon;
      double mz_tolerance(tolerance);

      if (is_relative_tolerance)
      {
        mz_tolerance = mz_tolerance * s1[it->first].getPosition()[0] / 1e6;
      }

      double mz_difference(fabs(s1[it->first].getPosition()[0] - s2[it->second].getPosition()[0]));
      double factor = 1.0;

      if (use_linear_factor || use_gaussian_factor)
      {
        factor = getFactor_(mz_tolerance, mz_difference, use_gaussian_factor);
      }
      sum += sqrt(s1[it->first].getIntensity() * s2[it->second].getIntensity() * factor);
    }

    score = sum / (sqrt(sum1 * sum2));

    return score;
  }
示例#4
0
  void PeakIntegrator::getDefaultParameters(Param& params)
  {
    params.clear();

    params.setValue("integration_type", INTEGRATION_TYPE_INTENSITYSUM, "The integration technique to use in integratePeak() and estimateBackground() which uses either the summed intensity, integration by Simpson's rule or trapezoidal integration.");
    params.setValidStrings("integration_type", ListUtils::create<String>("intensity_sum,simpson,trapezoid"));

    params.setValue("baseline_type", BASELINE_TYPE_BASETOBASE, "The baseline type to use in estimateBackground() based on the peak boundaries. A rectangular baseline shape is computed based either on the minimal intensity of the peak boundaries, the maximum intensity or the average intensity (base_to_base).");
    params.setValidStrings("baseline_type", ListUtils::create<String>("base_to_base,vertical_division,vertical_division_min,vertical_division_max"));

    params.setValue("fit_EMG", "false", "Fit the chromatogram/spectrum to the EMG peak model.");
    params.setValidStrings("fit_EMG", ListUtils::create<String>("false,true"));
  }
void digestFeaturesMapSimVector_(SimTypes::FeatureMapSimVector& feature_maps)
{
  // digest here
  DigestSimulation digest_sim;
  Param p;
  p.setValue("model", "naive");
  p.setValue("model_naive:missed_cleavages", 0);
  digest_sim.setParameters(p);
  std::cout << digest_sim.getParameters() << std::endl;
  for(SimTypes::FeatureMapSimVector::iterator iter = feature_maps.begin() ; iter != feature_maps.end() ; ++iter)
  {
    digest_sim.digest((*iter));
  }
}
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    String in_spectra = getStringOption_("in_spectra");
    String in_identifications = getStringOption_("in_identifications");
    String outfile = getStringOption_("model_output_file");
    Int precursor_charge = getIntOption_("precursor_charge");

    //-------------------------------------------------------------
    // init SvmTheoreticalSpectrumGeneratorTrainer
    //-------------------------------------------------------------
    SvmTheoreticalSpectrumGeneratorTrainer trainer;

    Param param = getParam_().copy("algorithm:", true);
    String write_files = getFlag_("write_training_files") ? "true" : "false";
    param.setValue("write_training_files", write_files);
    trainer.setParameters(param);

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    PeakMap map;
    MzMLFile().load(in_spectra, map);

    std::vector<PeptideIdentification> pep_ids;
    std::vector<ProteinIdentification> prot_ids;
    String tmp_str;
    IdXMLFile().load(in_identifications, prot_ids, pep_ids, tmp_str);

    IDMapper idmapper;
    Param par;
    par.setValue("rt_tolerance", 0.001);
    par.setValue("mz_tolerance", 0.001);
    idmapper.setParameters(par);
    idmapper.annotate(map, pep_ids, prot_ids);

    //generate vector of annotations
    std::vector<AASequence> annotations;
    PeakMap::iterator it;
    for (it = map.begin(); it != map.end(); ++it)
    {
      annotations.push_back(it->getPeptideIdentifications()[0].getHits()[0].getSequence());
    }

    trainer.trainModel(map, annotations, outfile, precursor_charge);
    return EXECUTION_OK;
  }
示例#7
0
 Param getSubsectionDefaults_(const String & section) const
 {
   Param p;
   if (section == "algorithm")
   {
     p.setValue("param1", "param1_value", "param1_description");
     p.setValue("param2", "param2_value", "param2_description");
   }
   else
   {
     p.setValue("param3", "param3_value", "param3_description");
     p.setValue("param4", "param4_value", "param4_description");
   }
   return p;
 }
  void ElutionPeakDetection::smoothData(MassTrace& mt, int win_size) const
  {
    // alternative smoothing using SavitzkyGolay
    // looking at the unit test, this method gives better fits than lowess smoothing
    // reference paper uses lowess smoothing

    MSSpectrum<PeakType> spectrum;
    spectrum.insert(spectrum.begin(), mt.begin(), mt.end());
    SavitzkyGolayFilter sg;
    Param param;
    param.setValue("polynomial_order", 2);
    param.setValue("frame_length", std::max(3, win_size)); // frame length must be at least polynomial_order+1, otherwise SG will fail
    sg.setParameters(param);
    sg.filter(spectrum);
    MSSpectrum<PeakType>::iterator iter = spectrum.begin();
    std::vector<double> smoothed_intensities;
    for (; iter != spectrum.end(); ++iter)
    {
      smoothed_intensities.push_back(iter->getIntensity());
    }
    mt.setSmoothedIntensities(smoothed_intensities);
    //alternative end

    // std::cout << "win_size elution: " << scan_time << " " << win_size << std::endl;

    // if there is no previous FWHM estimation... do it now
    //    if (win_size == 0)
    //    {
    //        mt.estimateFWHM(false); // estimate FWHM
    //        win_size = mt.getFWHMScansNum();
    //    }

    // use one global window size for all mass traces to smooth
    //  std::vector<double> rts, ints;
    //
    //  for (MassTrace::const_iterator c_it = mt.begin(); c_it != mt.end(); ++c_it)
    //  {
    //      rts.push_back(c_it->getRT());
    //      ints.push_back(c_it->getIntensity());
    //  }
    //  LowessSmoothing lowess_smooth;
    //  Param lowess_params;
    //  lowess_params.setValue("window_size", win_size);
    //  lowess_smooth.setParameters(lowess_params);
    //  std::vector<double> smoothed_data;
    //  lowess_smooth.smoothData(rts, ints, smoothed_data);
    //  mt.setSmoothedIntensities(smoothed_data);
  }
示例#9
0
文件: File.C 项目: BioITer/OpenMS
  Param File::getSystemParameterDefaults_()
  {
    Param p;
    p.setValue("version", VersionInfo::getVersion());
    p.setValue("home_dir", ""); // only active when user enters something in this value
    p.setValue("temp_dir", ""); // only active when user enters something in this value
    p.setValue("id_db_dir", ListUtils::create<String>(""),
               String("Default directory for FASTA and psq files used as databased for id engines. ") + \
               "This allows you to specify just the filename of the DB in the " + \
               "respective TOPP tool, and the database will be searched in the directories specified here " + \
               ""); // only active when user enters something in this value
    p.setValue("threads", 1);
    // TODO: maybe we add -log, -debug.... or....

    return p;
  }
示例#10
0
  Param MSSim::getParameters() const
  {
    Param tmp;
    tmp.insert("", this->param_); // get non-labeling options

    vector<String> products = Factory<BaseLabeler>::registeredProducts();

    tmp.setValue("Labeling:type", "labelfree", "Select the labeling type you want for your experiment");
    tmp.setValidStrings("Labeling:type", products);

    for (vector<String>::iterator product_name = products.begin(); product_name != products.end(); ++product_name)
    {
      BaseLabeler* labeler = Factory<BaseLabeler>::create(*product_name);
      if (labeler)
      {
        tmp.insert("Labeling:" + *product_name + ":", labeler->getDefaultParameters());
        if (!tmp.copy("Labeling:" + *product_name).empty())
        {
          // if parameters of labeler are empty, the section will not exist and
          // the command below would fail
          tmp.setSectionDescription("Labeling:" + *product_name, labeler->getDescription());
        }
        delete(labeler);
      }
      else
      {
        throw Exception::InvalidValue(__FILE__, __LINE__, __PRETTY_FUNCTION__, "This labeler returned by the Factory is invalid!", product_name->c_str()); 
      }
    }

    return tmp;
  }
示例#11
0
int main(int argc, const char** argv)
{
  if (argc < 2) return 1;
  // the path to the data should be given on the command line
  String tutorial_data_path(argv[1]);
  
  PeakMap exp_raw;
  PeakMap exp_picked;

  MzMLFile mzml_file;
  mzml_file.load(tutorial_data_path + "/data/Tutorial_PeakPickerCWT.mzML", exp_raw);

  PeakPickerCWT pp;
  Param param;
  param.setValue("peak_width", 0.1);
  pp.setParameters(param);

  pp.pickExperiment(exp_raw, exp_picked);
  exp_picked.updateRanges();

  cout << "\nMinimal fwhm of a mass spectrometric peak: " << (DoubleReal)param.getValue("peak_width")
       << "\n\nNumber of picked peaks " << exp_picked.getSize() << std::endl;

  return 0;
} //end of main
示例#12
0
// for SWATH -- get the theoretical b and y series masses for a sequence
void getBYSeries(AASequence& a, //
                 std::vector<double>& bseries, //
                 std::vector<double>& yseries, //
                 UInt charge //
                )
{
    OPENMS_PRECONDITION(charge > 0, "Charge is a positive integer");
    TheoreticalSpectrumGenerator generator;
    Param p;
    p.setValue("add_metainfo", "true",
               "Adds the type of peaks as metainfo to the peaks, like y8+, [M-H2O+2H]++");
    generator.setParameters(p);

    RichPeakSpectrum rich_spec;
    generator.addPeaks(rich_spec, a, Residue::BIon, charge);
    generator.addPeaks(rich_spec, a, Residue::YIon, charge);

    for (RichPeakSpectrum::iterator it = rich_spec.begin();
            it != rich_spec.end(); ++it)
    {
        if (it->getMetaValue("IonName").toString()[0] == 'y')
        {
            yseries.push_back(it->getMZ());
        }
        else if (it->getMetaValue("IonName").toString()[0] == 'b')
        {
            bseries.push_back(it->getMZ());
        }
    }
} // end getBYSeries
 void TransformationModelLinear::getDefaultParameters(Param& params)
 {
   params.clear();
   params.setValue("symmetric_regression", "false", "Perform linear regression"
                                                    " on 'y - x' vs. 'y + x', instead of on 'y' vs. 'x'.");
   params.setValidStrings("symmetric_regression",
                          ListUtils::create<String>("true,false"));
 }
示例#14
0
 void TransformationModelInterpolated::getDefaultParameters(Param & params)
 {
   params.clear();
   params.setValue("interpolation_type", "cspline",
                   "Type of interpolation to apply.");
   StringList types = StringList::create("linear,polynomial,cspline,akima");
   params.setValidStrings("interpolation_type", types);
 }
示例#15
0
 Param getSubsectionDefaults_(const String & /*section*/) const
 {
   Param tmp;
   tmp.insert("Extraction:", ItraqChannelExtractor(ItraqQuantifier::FOURPLEX).getParameters());    // type is irrelevant - ini is the same
   tmp.insert("Quantification:", ItraqQuantifier(ItraqQuantifier::FOURPLEX).getParameters());    // type is irrelevant - ini is the same
   tmp.setValue("MetaInformation:Program", "OpenMS::ITRAQAnalyzer", "", StringList::create("advanced"));
   return tmp;
 }
 Param getSubsectionDefaults_(const String & /*section*/) const
 {
   Param tmp;
   tmp.insert("Extraction:", ItraqChannelExtractor(ItraqQuantifier::TMT_SIXPLEX).getParameters());
   tmp.insert("Quantification:", ItraqQuantifier(ItraqQuantifier::TMT_SIXPLEX).getParameters());
   tmp.setValue("MetaInformation:Program", "OpenMS::TMTAnalyzer", "", ListUtils::create<String>("advanced"));
   return tmp;
 }
  IsotopeFitter1D::QualityType IsotopeFitter1D::fit1d(const RawDataArrayType& set, InterpolationModel*& model)
  {
    // Calculate bounding box
    min_ = max_ = set[0].getPos();
    for (UInt pos = 1; pos < set.size(); ++pos)
    {
      CoordinateType tmp = set[pos].getPos();
      if (min_ > tmp)
        min_ = tmp;
      if (max_ < tmp)
        max_ = tmp;
    }

    // Enlarge the bounding box by a few multiples of the standard deviation
    {
      stdev1_ = sqrt(statistics_.variance()) * tolerance_stdev_box_;
      min_ -= stdev1_;
      max_ += stdev1_;
    }

    // build model
    if (charge_ == 0)
    {
      model = static_cast<InterpolationModel*>(Factory<BaseModel<1> >::create("GaussModel"));
      model->setInterpolationStep(interpolation_step_);

      Param tmp;
      tmp.setValue("bounding_box:min", min_);
      tmp.setValue("bounding_box:max", max_);
      tmp.setValue("statistics:variance", statistics_.variance());
      tmp.setValue("statistics:mean", statistics_.mean());
      model->setParameters(tmp);
    }
    else
    {
      model = static_cast<InterpolationModel*>(Factory<BaseModel<1> >::create("IsotopeModel"));

      Param iso_param = this->param_.copy("isotope_model:", true);
      iso_param.removeAll("stdev");
      model->setParameters(iso_param);
      model->setInterpolationStep(interpolation_step_);

      Param tmp;
      tmp.setValue("statistics:mean", statistics_.mean());
      tmp.setValue("charge", static_cast<Int>(charge_));
      tmp.setValue("isotope:mode:GaussianSD", isotope_stdev_);
      tmp.setValue("isotope:maximum", max_isotope_);

      model->setParameters(tmp);
      (static_cast<IsotopeModel*>(model))->setSamples((static_cast<IsotopeModel*>(model))->getFormula());
    }

    // fit offset
    QualityType quality;
    quality = fitOffset_(model, set, stdev1_, stdev1_, interpolation_step_);
    if (boost::math::isnan(quality))
      quality = -1.0;

    return quality;
  }
示例#18
0
文件: File.C 项目: BioITer/OpenMS
  Param File::getSystemParameters()
  {
    String filename = String(QDir::homePath()) + "/.OpenMS/OpenMS.ini";
    Param p;
    if (!File::readable(filename)) // create file
    {
      p = getSystemParameterDefaults_();

      String dirname = String(QDir::homePath()) + "/.OpenMS";
      QDir dir(dirname.toQString());
      if (!dir.exists())
      {
        if (!File::writable(dirname))
        {
          LOG_WARN << "Warning: Cannot create folder '.OpenMS' in user home directory. Please check your environment!" << std::endl;
          LOG_WARN << "         Home directory determined is: " << QDir::homePath().toStdString() << "." << std::endl;
          return p;
        }
        dir.mkpath(".");
      }

      if (!File::writable(filename))
      {
        LOG_WARN << "Warning: Cannot create '.OpenMS/OpenMS.ini' in user home directory. Please check your environment!" << std::endl;
        LOG_WARN << "         Home directory determined is: " << QDir::homePath().toStdString() << "." << std::endl;
        return p;
      }

      ParamXMLFile paramFile;
      paramFile.store(filename, p);
    }
    else
    {
      ParamXMLFile paramFile;
      paramFile.load(filename, p);

      // check version
      if (!p.exists("version") || (p.getValue("version") != VersionInfo::getVersion()))
      {
        if (!p.exists("version"))
        {
          LOG_WARN << "Broken file '" << filename << "' discovered. The 'version' tag is missing." << std::endl;
        }
        else // old version
        {
          LOG_WARN << "File '" << filename << "' is deprecated." << std::endl;
        }
        LOG_WARN << "Updating missing/wrong entries in '" << filename << "' with defaults!" << std::endl;
        Param p_new = getSystemParameterDefaults_();
        p.setValue("version", VersionInfo::getVersion()); // update old version, such that p_new:version does not get overwritten during update()
        p_new.update(p);

        paramFile.store(filename, p_new);
      }
    }
    return p;
  }
  void TransformationModelLowess::getDefaultParameters(Param& params)
  {
    params.clear();
    params.setValue("span", 2/3.0, "Fraction of datapoints (f) to use for each local regression (determines the amount of smoothing). Choosing this parameter in the range .2 to .8 usually results in a good fit.");
    params.setMinFloat("span", 0.0);
    params.setMaxFloat("span", 1.0);

    params.setValue("num_iterations", 3, "Number of rubstifying iterations for lowess fitting.");
    params.setMinInt("num_iterations", 0);

    params.setValue("delta", -1.0, "Nonnegative parameter which may be used to save computations (recommended value is 0.01 of the range of the input, e.g. for data ranging from 1000 seconds to 2000 seconds, it could be set to 10). Setting a negative value will automatically do this.");

    params.setValue("interpolation_type", "cspline", "Method to use for interpolation between datapoints computed by lowess. 'linear': Linear interpolation. 'cspline': Use the cubic spline for interpolation. 'akima': Use an akima spline for interpolation");
    params.setValidStrings("interpolation_type", ListUtils::create<String>("linear,cspline,akima"));

    params.setValue("extrapolation_type", "four-point-linear", "Method to use for extrapolation outside the data range. 'two-point-linear': Uses a line through the first and last point to extrapolate. 'four-point-linear': Uses a line through the first and second point to extrapolate in front and and a line through the last and second-to-last point in the end. 'global-linear': Uses a linear regression to fit a line through all data points and use it for interpolation.");
    StringList etypes = ListUtils::create<String>("two-point-linear,four-point-linear,global-linear");
    params.setValidStrings("extrapolation_type", etypes);
  }
示例#20
0
/*! Converts this \l{RuleAction} to a normal \l{Action}.
 *  \sa Action, */
Action RuleAction::toAction() const
{
    Action action(m_actionTypeId, m_deviceId);
    ParamList params;
    foreach (const RuleActionParam &ruleActionParam, m_ruleActionParams) {
        Param param;
        param.setName(ruleActionParam.name());
        param.setValue(ruleActionParam.value());
        params.append(param);
    }
示例#21
0
short SetParam::process(SqlciEnv * sqlci_env)
{
  if (get_arglen() == -1)
    {
      // set param to null value
      Param * param = sqlci_env->get_paramlist()->get(param_name);
      if (param)
	param->makeNull();
      else
	{
	  param = new Param(param_name, (char*)0);
	  
	  sqlci_env->get_paramlist()->append(param);
	  param->makeNull();
	}
    }
  else
    if (!get_argument())
    {
      /* RESET PARAM command */
      if (!param_name)
	{
	  /* RESET all params */
	  Param * param = sqlci_env->get_paramlist()->getFirst();
	  while (param)
	    {
	      sqlci_env->get_paramlist()->remove(param->getName());
	      delete param;
	      param = sqlci_env->get_paramlist()->getNext();
	    }
	}
      else
	{
	  Param * param = sqlci_env->get_paramlist()->get(param_name);
	  sqlci_env->get_paramlist()->remove(param_name);
	  delete param;
	 }
    } 
  else
    {
      /* SET PARAM command */
      Param * param = sqlci_env->get_paramlist()->get(param_name);
      if (param) {
	param->setValue(this);
      } else
	{
	  param = new Param(param_name, this);
	  
	  sqlci_env->get_paramlist()->append(param);
	}
    }
  
  return 0;
}
START_SECTION(void apply(std::vector<PeptideIdentification>& ids))
{
  TOLERANCE_ABSOLUTE(0.01)

  ConsensusIDAlgorithmAverage consensus;
  // define parameters:
  Param param;
  param.setValue("filter:considered_hits", 5);
  consensus.setParameters(param);
  // apply:
  vector<PeptideIdentification> f = ids;
  consensus.apply(f);

  TEST_EQUAL(f.size(), 1);
  hits = f[0].getHits();
  TEST_EQUAL(hits.size(), 7);

  TEST_EQUAL(hits[0].getRank(), 1);
  TEST_EQUAL(hits[0].getSequence(), AASequence::fromString("F"));
  TEST_REAL_SIMILAR(hits[0].getScore(), 0.0);

  // the two "0.2" scores are not equal (due to floating-point number effects),
  // therefore the ranks of the hits differ:
  TEST_EQUAL(hits[1].getScore() < hits[2].getScore(), true);

  TEST_EQUAL(hits[1].getRank(), 2);
  TEST_EQUAL(hits[1].getSequence(), AASequence::fromString("C"));
  TEST_REAL_SIMILAR(hits[1].getScore(), 0.2);

  TEST_EQUAL(hits[2].getRank(), 3);
  TEST_EQUAL(hits[2].getSequence(), AASequence::fromString("G"));
  TEST_REAL_SIMILAR(hits[2].getScore(), 0.2);
  
  TEST_EQUAL(hits[3].getRank(), 4);
  TEST_EQUAL(hits[3].getSequence(), AASequence::fromString("A"));
  TEST_REAL_SIMILAR(hits[3].getScore(), 0.25);

  TEST_EQUAL(hits[4].getRank(), 5);
  TEST_EQUAL(hits[4].getSequence(), AASequence::fromString("D"));
  TEST_REAL_SIMILAR(hits[4].getScore(), 0.35);

  TEST_EQUAL(hits[5].getRank(), 6);
  TEST_EQUAL(hits[5].getSequence(), AASequence::fromString("B"));
  TEST_REAL_SIMILAR(hits[5].getScore(), 0.4);

  TEST_EQUAL(hits[6].getRank(), 7);
  TEST_EQUAL(hits[6].getSequence(), AASequence::fromString("E"));
  TEST_REAL_SIMILAR(hits[6].getScore(), 0.5);


  ids[2].setHigherScoreBetter(true);
  TEST_EXCEPTION(Exception::InvalidValue, consensus.apply(ids));
}
示例#23
0
  BiGaussFitter1D::QualityType BiGaussFitter1D::fit1d(const RawDataArrayType& set, InterpolationModel*& model)
  {
    // Calculate bounding box
    CoordinateType min_bb = set[0].getPos(), max_bb = set[0].getPos();
    for (UInt pos = 1; pos < set.size(); ++pos)
    {
      CoordinateType tmp = set[pos].getPos();
      if (min_bb > tmp)
        min_bb = tmp;
      if (max_bb < tmp)
        max_bb = tmp;
    }

    // Enlarge the bounding box by a few multiples of the standard deviation
    const CoordinateType stdev1 = sqrt(statistics1_.variance()) * tolerance_stdev_box_;
    const CoordinateType stdev2 = sqrt(statistics2_.variance()) * tolerance_stdev_box_;
    min_bb -= stdev1;
    max_bb += stdev2;


    // build model
    model = static_cast<InterpolationModel*>(Factory<BaseModel<1> >::create("BiGaussModel"));
    model->setInterpolationStep(interpolation_step_);
    Param tmp;
    tmp.setValue("bounding_box:min", min_bb);
    tmp.setValue("bounding_box:max", max_bb);
    tmp.setValue("statistics:mean", statistics1_.mean());
    tmp.setValue("statistics:variance1", statistics1_.variance());
    tmp.setValue("statistics:variance2", statistics2_.variance());
    model->setParameters(tmp);

    // fit offset
    QualityType quality;
    quality = fitOffset_(model, set, stdev1, stdev2, interpolation_step_);
    if (boost::math::isnan(quality))
      quality = -1.0;

    return quality;
  }
START_SECTION(void apply(std::vector<PeptideIdentification>& ids))
{
  TOLERANCE_ABSOLUTE(0.01)

  ConsensusIDAlgorithmRanks consensus;
  // define parameters:
  Param param;
  param.setValue("filter:considered_hits", 5);
  consensus.setParameters(param);
  // apply:
  vector<PeptideIdentification> f = ids;
  consensus.apply(f);

  TEST_EQUAL(f.size(), 1);
  hits = f[0].getHits();
  TEST_EQUAL(hits.size(), 7);

  TEST_EQUAL(hits[0].getRank(), 1);
  TEST_EQUAL(hits[0].getSequence(), AASequence::fromString("C"));
  TEST_REAL_SIMILAR(hits[0].getScore(), 0.8);

  TEST_EQUAL(hits[1].getRank(), 2);
  TEST_EQUAL(hits[1].getSequence(), AASequence::fromString("A"));
  TEST_REAL_SIMILAR(hits[1].getScore(), 0.6);

  TEST_EQUAL(hits[2].getRank(), 3);
  TEST_EQUAL(hits[2].getSequence(), AASequence::fromString("B"));
  TEST_REAL_SIMILAR(hits[2].getScore(), 0.5333);

  TEST_EQUAL(hits[3].getRank(), 4);
  TEST_EQUAL(hits[3].getSequence(), AASequence::fromString("F"));
  TEST_REAL_SIMILAR(hits[3].getScore(), 0.33333);

  TEST_EQUAL(hits[4].getRank(), 5);
  TEST_EQUAL(hits[4].getSequence(), AASequence::fromString("D"));
  TEST_REAL_SIMILAR(hits[4].getScore(), 0.26666);

  TEST_EQUAL(hits[5].getRank(), 6);
  TEST_EQUAL(hits[5].getSequence(), AASequence::fromString("G"));
  TEST_REAL_SIMILAR(hits[5].getScore(), 0.2);

  TEST_EQUAL(hits[6].getRank(), 7);
  TEST_EQUAL(hits[6].getSequence(), AASequence::fromString("E"));
  TEST_REAL_SIMILAR(hits[6].getScore(), 0.06666);
}
示例#25
0
// for SWATH -- get the theoretical b and y series masses for a sequence
void getTheorMasses(AASequence& a, std::vector<double>& masses,
                    UInt charge)
{
    OPENMS_PRECONDITION(charge > 0, "Charge is a positive integer");
    TheoreticalSpectrumGenerator generator;
    Param p;
    p.setValue("add_metainfo", "true",
               "Adds the type of peaks as metainfo to the peaks, like y8+, [M-H2O+2H]++");
    generator.setParameters(p);
    RichPeakSpectrum rich_spec;
    generator.addPeaks(rich_spec, a, Residue::BIon, charge);
    generator.addPeaks(rich_spec, a, Residue::YIon, charge);
    generator.addPrecursorPeaks(rich_spec, a, charge);
    for (RichPeakSpectrum::iterator it = rich_spec.begin();
            it != rich_spec.end(); ++it)
    {
        masses.push_back(it->getMZ());
    }
} // end getBYSeries
示例#26
0
 void TransformationModelLinear::getDefaultParameters(Param& params)
 {
   params.clear();
   params.setValue("symmetric_regression", "false", "Perform linear regression"
                                                    " on 'y - x' vs. 'y + x', instead of on 'y' vs. 'x'.");
   params.setValidStrings("symmetric_regression",
                          ListUtils::create<String>("true,false"));
   params.setValue("x_weight", "", "Weight x values");
   params.setValidStrings("x_weight",
                          ListUtils::create<String>("1/x,1/x2,ln(x),"));
   params.setValue("y_weight", "", "Weight y values");
   params.setValidStrings("y_weight",
                          ListUtils::create<String>("1/y,1/y2,ln(y),"));
   params.setValue("x_datum_min", 1e-15, "Minimum x value");
   params.setValue("x_datum_max", 1e15, "Maximum x value");
   params.setValue("y_datum_min", 1e-15, "Minimum y value");
   params.setValue("y_datum_max", 1e15, "Maximum y value");
 }
示例#27
0
short SetPattern::process(SqlciEnv * sqlci_env)
{
  if (!get_argument())
    {
      /* RESET PATTERN command */
      if (!pattern_name)
	{
	  /* RESET all patterns */
	  Param * pattern = sqlci_env->get_patternlist()->getFirst();
	  while (pattern)
	    {
	      sqlci_env->get_patternlist()->remove(pattern->getName());
	      delete pattern;
	      pattern = sqlci_env->get_patternlist()->getNext();
	    }
	}
      else
	{
	  Param * pattern = sqlci_env->get_patternlist()->get(pattern_name);
	  sqlci_env->get_patternlist()->remove(pattern_name);
	  delete pattern;
	}
    } 
  else
    {
      /* SET PATTERN command */
      Param * pattern = sqlci_env->get_patternlist()->get(pattern_name);
      if (pattern)
      {
	pattern->setValue(get_argument(), sqlci_env->getTerminalCharset());
      }
      else
	{
	  pattern = new Param(pattern_name, get_argument());
	  
	  sqlci_env->get_patternlist()->append(pattern);
	}
    }
  
  return 0;
}
int main(int argc, const char** argv)
{
  if (argc < 2) return 1;

  // the path to the data should be given on the command line
  String tutorial_data_path(argv[1]);

  TOFCalibration ec;
  PeakMap exp_raw, calib_exp;
  MzMLFile mzml_file;
  mzml_file.load(tutorial_data_path + "/data/Tutorial_TOFCalibration_peak.mzML", calib_exp);
  mzml_file.load(tutorial_data_path + "/data/Tutorial_TOFCalibration_raw.mzML", exp_raw);

  vector<double> ref_masses;
  TextFile ref_file;
  ref_file.load(tutorial_data_path + "/data/Tutorial_TOFCalibration_masses.txt", true);
  for (TextFile::ConstIterator iter = ref_file.begin(); iter != ref_file.end(); ++iter)
  {
    ref_masses.push_back(String(iter->c_str()).toDouble());
  }

  std::vector<double> ml1;
  ml1.push_back(418327.924993827);

  std::vector<double> ml2;
  ml2.push_back(253.645187196031);

  std::vector<double> ml3;
  ml3.push_back(-0.0414243465397252);

  ec.setML1s(ml1);
  ec.setML2s(ml2);
  ec.setML3s(ml3);

  Param param;
  param.setValue("PeakPicker:peak_width", 0.1);
  ec.setParameters(param);
  ec.pickAndCalibrate(calib_exp, exp_raw, ref_masses);

  return 0;
} //end of main
  TransformationModelLowess::TransformationModelLowess(
      const TransformationModel::DataPoints& data_,
      const Param& params) : model_(0)
  {
    // parameter handling/checking:
    params_ = params;
    Param defaults;
    getDefaultParameters(defaults);
    params_.setDefaults(defaults);

    if (data_.size() < 2)
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
                                       "'lowess' model requires more data");
    }

    // TODO copy ... 
    TransformationModel::DataPoints data(data_);

    // sort data
    std::sort(data.begin(), data.end(), cmpFirstDimension);

    vector<double> x(data.size()), y(data.size()), result(data.size());
    double xmin_ = data[0].first;
    double xmax_ = xmin_;
    for (Size i = 0; i < data.size(); ++i)
    {
      x[i] = data[i].first;
      y[i] = data[i].second;
      if (x[i] < xmin_) 
      {
        xmin_ = x[i];
      }
      else if (x[i] > xmax_)
      {
        xmax_ = x[i];
      }
    }

    double span = params_.getValue("span");
    int nsteps = params_.getValue("num_iterations");
    double delta = params_.getValue("delta");
    
    if (delta < 0.0)
    {
      delta = (xmax_ - xmin_) * 0.01; // automatically determine delta
    }

    FastLowessSmoothing::lowess(x, y, span, nsteps, delta, result);

    TransformationModel::DataPoints data_out;
    for (Size i = 0; i < result.size(); ++i)
    {
      data_out.push_back( std::make_pair(x[i], result[i]) );
    }

    // TODO thin out data here ? we may not need that many points here to interpolate ...  it is enough if we store a few datapoints

    Param p;
    TransformationModelInterpolated::getDefaultParameters(p);
    /// p.setValue("interpolation_type", "cspline"); // linear interpolation between lowess pts
    /// p.setValue("extrapolation_type", "four-point-linear");
    p.setValue("interpolation_type", params_.getValue("interpolation_type"));
    p.setValue("extrapolation_type", params_.getValue("extrapolation_type"));

    // create new interpolation model based on the lowess data
    model_ = new TransformationModelInterpolated(data_out, p);
  }
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    StringList in = getStringList_("in");
    String edta = getStringOption_("pos");
    String out = getStringOption_("out");
    String out_sep = getStringOption_("out_separator");
    String out_TIC_debug = getStringOption_("auto_rt:out_debug_TIC");

    StringList in_header = getStringList_("in_header");


    // number of out_debug_TIC files and input files must be identical
    /*if (out_TIC_debug.size() > 0 && in.size() != out_TIC_debug.size())
    {
        LOG_FATAL_ERROR << "Error: number of input file 'in' and auto_rt:out_debug_TIC files must be identical!" << std::endl;
        return ILLEGAL_PARAMETERS;
    }*/

    // number of header files and input files must be identical
    if (in_header.size() > 0 && in.size() != in_header.size())
    {
      LOG_FATAL_ERROR << "Error: number of input file 'in' and 'in_header' files must be identical!" << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    if (!getFlag_("auto_rt:enabled") && !out_TIC_debug.empty())
    {
      LOG_FATAL_ERROR << "Error: TIC output file requested, but auto_rt is not enabled! Either do not request the file or switch on 'auto_rt:enabled'." << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    double rttol = getDoubleOption_("rt_tol");
    double mztol = getDoubleOption_("mz_tol");
    Size rt_collect = getIntOption_("rt_collect");

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    MzMLFile mzml_file;
    mzml_file.setLogType(log_type_);
    MSExperiment<Peak1D> exp, exp_pp;

    EDTAFile ed;
    ConsensusMap cm;
    ed.load(edta, cm);

    StringList tf_single_header0, tf_single_header1, tf_single_header2; // header content, for each column

    std::vector<String> vec_single; // one line for each compound, multiple columns per experiment
    vec_single.resize(cm.size());
    for (Size fi = 0; fi < in.size(); ++fi)
    {
      // load raw data
      mzml_file.load(in[fi], exp);
      exp.sortSpectra(true);

      if (exp.empty())
      {
        LOG_WARN << "The given file does not contain any conventional peak data, but might"
                    " contain chromatograms. This tool currently cannot handle them, sorry." << std::endl;
        return INCOMPATIBLE_INPUT_DATA;
      }

      // try to detect RT peaks (only for the first input file -- all others should align!)
      // cm.size() might change in here...
      if (getFlag_("auto_rt:enabled") && fi == 0)
      {
        ConsensusMap cm_local = cm; // we might have different RT peaks for each map if 'auto_rt' is enabled
        cm.clear(false); // reset global list (about to be filled)

        // compute TIC
        MSChromatogram<> tic = exp.getTIC();
        MSSpectrum<> tics, tic_gf, tics_pp, tics_sn;
        for (Size ic = 0; ic < tic.size(); ++ic)
        { // rewrite Chromatogram to MSSpectrum (GaussFilter requires it)
          Peak1D peak;
          peak.setMZ(tic[ic].getRT());
          peak.setIntensity(tic[ic].getIntensity());
          tics.push_back(peak);
        }
        // smooth (no PP_CWT here due to efficiency reasons -- large FWHM take longer!)
        double fwhm = getDoubleOption_("auto_rt:FHWM");
        GaussFilter gf;
        Param p = gf.getParameters();
        p.setValue("gaussian_width", fwhm * 2); // wider than FWHM, just to be sure we have a fully smoothed peak. Merging two peaks is unlikely
        p.setValue("use_ppm_tolerance", "false");
        gf.setParameters(p);
        tic_gf = tics;
        gf.filter(tic_gf);
        // pick peaks
        PeakPickerHiRes pp;
        p = pp.getParameters();
        p.setValue("signal_to_noise", getDoubleOption_("auto_rt:SNThreshold"));
        pp.setParameters(p);
        pp.pick(tic_gf, tics_pp);

        if (tics_pp.size())
        {
          LOG_INFO << "Found " << tics_pp.size() << " auto-rt peaks at: ";
          for (Size ipp = 0; ipp != tics_pp.size(); ++ipp) LOG_INFO << " " << tics_pp[ipp].getMZ();
        }
        else
        {
          LOG_INFO << "Found no auto-rt peaks. Change threshold parameters!";
        }
        LOG_INFO << std::endl;

        if (!out_TIC_debug.empty()) // if debug file was given
        { // store intermediate steps for debug
          MSExperiment<> out_debug;
          out_debug.addChromatogram(toChromatogram(tics));
          out_debug.addChromatogram(toChromatogram(tic_gf));

          SignalToNoiseEstimatorMedian<MSSpectrum<> > snt;
          snt.init(tics);
          for (Size is = 0; is < tics.size(); ++is)
          {
            Peak1D peak;
            peak.setMZ(tic[is].getMZ());
            peak.setIntensity(snt.getSignalToNoise(tics[is]));
            tics_sn.push_back(peak);
          }
          out_debug.addChromatogram(toChromatogram(tics_sn));

          out_debug.addChromatogram(toChromatogram(tics_pp));
          // get rid of "native-id" missing warning
          for (Size id = 0; id < out_debug.size(); ++id) out_debug[id].setNativeID(String("spectrum=") + id);

          mzml_file.store(out_TIC_debug, out_debug);
          LOG_DEBUG << "Storing debug AUTO-RT: " << out_TIC_debug << std::endl;
        }

        // add target EICs: for each m/z with no/negative RT, add all combinations of that m/z with auto-RTs
        // duplicate m/z entries will be ignored!
        // all other lines with positive RT values are copied unaffected
        //do not allow doubles
        std::set<double> mz_doubles;
        for (ConsensusMap::Iterator cit = cm_local.begin(); cit != cm_local.end(); ++cit)
        {
          if (cit->getRT() < 0)
          {
            if (mz_doubles.find(cit->getMZ()) == mz_doubles.end())
            {
              mz_doubles.insert(cit->getMZ());
            }
            else
            {
              LOG_INFO << "Found duplicate m/z entry (" << cit->getMZ() << ") for auto-rt. Skipping ..." << std::endl;
              continue;
            }

            ConsensusMap cm_RT_multiplex;
            for (MSSpectrum<>::ConstIterator itp = tics_pp.begin(); itp != tics_pp.end(); ++itp)
            {
              ConsensusFeature f = *cit;
              f.setRT(itp->getMZ());
              cm.push_back(f);
            }

          }
          else
          { // default feature with no auto-rt
            LOG_INFO << "copying feature with RT " << cit->getRT() << std::endl;
            cm.push_back(*cit);
          }
        }

        // resize, since we have more positions now
        vec_single.resize(cm.size());
      }


      // search for each EIC and add up
      Int not_found(0);
      Map<Size, double> quant;

      String description;
      if (fi < in_header.size())
      {
        HeaderInfo info(in_header[fi]);
        description = info.header_description;
      }

      if (fi == 0)
      { // two additional columns for first file (theoretical RT and m/z)
        tf_single_header0 << "" << "";
        tf_single_header1 << "" << "";
        tf_single_header2 << "RT" << "mz";
      }

      // 5 entries for each input file
      tf_single_header0 << File::basename(in[fi]) << "" << "" << "" << "";
      tf_single_header1 << description << "" << "" << "" << "";
      tf_single_header2 << "RTobs" << "dRT" << "mzobs" << "dppm" << "intensity";

      for (Size i = 0; i < cm.size(); ++i)
      {
        //std::cerr << "Rt" << cm[i].getRT() << "  mz: " << cm[i].getMZ() << " R " <<  cm[i].getMetaValue("rank") << "\n";

        double mz_da = mztol * cm[i].getMZ() / 1e6; // mz tolerance in Dalton
        MSExperiment<>::ConstAreaIterator it = exp.areaBeginConst(cm[i].getRT() - rttol / 2,
                                                                  cm[i].getRT() + rttol / 2,
                                                                  cm[i].getMZ() - mz_da,
                                                                  cm[i].getMZ() + mz_da);
        Peak2D max_peak;
        max_peak.setIntensity(0);
        max_peak.setRT(cm[i].getRT());
        max_peak.setMZ(cm[i].getMZ());
        for (; it != exp.areaEndConst(); ++it)
        {
          if (max_peak.getIntensity() < it->getIntensity())
          {
            max_peak.setIntensity(it->getIntensity());
            max_peak.setRT(it.getRT());
            max_peak.setMZ(it->getMZ());
          }
        }
        double ppm = 0; // observed m/z offset

        if (max_peak.getIntensity() == 0)
        {
          ++not_found;
        }
        else
        {
          // take median for m/z found
          std::vector<double> mz;
          MSExperiment<>::Iterator itm = exp.RTBegin(max_peak.getRT());
          SignedSize low = std::min<SignedSize>(std::distance(exp.begin(), itm), rt_collect);
          SignedSize high = std::min<SignedSize>(std::distance(itm, exp.end()) - 1, rt_collect);
          MSExperiment<>::AreaIterator itt = exp.areaBegin((itm - low)->getRT() - 0.01, (itm + high)->getRT() + 0.01, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da);
          for (; itt != exp.areaEnd(); ++itt)
          {
            mz.push_back(itt->getMZ());
            //std::cerr << "ppm: " << itt.getRT() << " " <<  itt->getMZ() << " " << itt->getIntensity() << std::endl;
          }

          if ((SignedSize)mz.size() > (low + high + 1)) LOG_WARN << "Compound " << i << " has overlapping peaks [" << mz.size() << "/" << low + high + 1 << "]" << std::endl;

          if (!mz.empty())
          {
            double avg_mz = std::accumulate(mz.begin(), mz.end(), 0.0) / double(mz.size());
            //std::cerr << "avg: " << avg_mz << "\n";
            ppm = (avg_mz - cm[i].getMZ()) / cm[i].getMZ() * 1e6;
          }

        }

        // appending the second column set requires separator
        String append_sep = (fi == 0 ? "" : out_sep);

        vec_single[i] += append_sep; // new line
        if (fi == 0)
        {
          vec_single[i] += String(cm[i].getRT()) + out_sep +
                           String(cm[i].getMZ()) + out_sep;
        }
        vec_single[i] += String(max_peak.getRT()) + out_sep +
                         String(max_peak.getRT() - cm[i].getRT()) + out_sep +
                         String(max_peak.getMZ()) + out_sep +
                         String(ppm)  + out_sep +
                         String(max_peak.getIntensity());
      }

      if (not_found) LOG_INFO << "Missing peaks for " << not_found << " compounds in file '" << in[fi] << "'.\n";
    }

    //-------------------------------------------------------------
    // create header
    //-------------------------------------------------------------
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header2, out_sep));
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header1, out_sep));
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header0, out_sep));

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    TextFile tf;
    for (std::vector<String>::iterator v_it = vec_single.begin(); v_it != vec_single.end(); ++v_it)
    {
      tf.addLine(*v_it);
    }
    tf.store(out);

    return EXECUTION_OK;
  }