void InternalCalibration::calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name) { checkReferenceIds_(ref_ids); calibrated_feature_map = feature_map; // clear the ids for (Size f = 0; f < calibrated_feature_map.size(); ++f) { calibrated_feature_map[f].getPeptideIdentifications().clear(); } // map the reference ids onto the features IDMapper mapper; Param param; param.setValue("rt_tolerance", (DoubleReal)param_.getValue("rt_tolerance")); param.setValue("mz_tolerance", param_.getValue("mz_tolerance")); param.setValue("mz_measure", param_.getValue("mz_tolerance_unit")); mapper.setParameters(param); std::vector<ProteinIdentification> vec; mapper.annotate(calibrated_feature_map, ref_ids, vec); // calibrate calibrateMapGlobally(calibrated_feature_map, calibrated_feature_map, trafo_file_name); // copy the old ids calibrated_feature_map.setUnassignedPeptideIdentifications(feature_map.getUnassignedPeptideIdentifications()); for (Size f = 0; f < feature_map.size(); ++f) { calibrated_feature_map[f].getPeptideIdentifications().clear(); if (!feature_map[f].getPeptideIdentifications().empty()) { calibrated_feature_map[f].setPeptideIdentifications(feature_map[f].getPeptideIdentifications()); } } }
void TransformationModelBSpline::getDefaultParameters(Param & params) { params.clear(); params.setValue("num_breakpoints", 5, "Number of breakpoints of the cubic spline in the smoothing step. More breakpoints mean less smoothing. Reduce this number if the transformation has an unexpected shape."); params.setMinInt("num_breakpoints", 2); params.setValue("break_positions", "uniform", "How to distribute the breakpoints on the retention time scale. 'uniform': intervals of equal size; 'quantiles': equal number of data points per interval."); params.setValidStrings("break_positions", StringList::create("uniform,quantiles")); }
double SpectrumAlignmentScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const { const double tolerance = (double)param_.getValue("tolerance"); bool is_relative_tolerance = param_.getValue("is_relative_tolerance").toBool(); bool use_linear_factor = param_.getValue("use_linear_factor").toBool(); bool use_gaussian_factor = param_.getValue("use_gaussian_factor").toBool(); if (use_linear_factor && use_gaussian_factor) { cerr << "Warning: SpectrumAlignmentScore, use either 'use_linear_factor' or 'use_gaussian_factor'!" << endl; } SpectrumAlignment aligner; Param p; p.setValue("tolerance", tolerance); p.setValue("is_relative_tolerance", (String)param_.getValue("is_relative_tolerance")); aligner.setParameters(p); vector<pair<Size, Size> > alignment; aligner.getSpectrumAlignment(alignment, s1, s2); double score(0), sum(0), sum1(0), sum2(0); for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1) { sum1 += it1->getIntensity() * it1->getIntensity(); } for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1) { sum2 += it1->getIntensity() * it1->getIntensity(); } for (vector<pair<Size, Size> >::const_iterator it = alignment.begin(); it != alignment.end(); ++it) { //double factor(0.0); //factor = (epsilon - fabs(s1[it->first].getPosition()[0] - s2[it->second].getPosition()[0])) / epsilon; double mz_tolerance(tolerance); if (is_relative_tolerance) { mz_tolerance = mz_tolerance * s1[it->first].getPosition()[0] / 1e6; } double mz_difference(fabs(s1[it->first].getPosition()[0] - s2[it->second].getPosition()[0])); double factor = 1.0; if (use_linear_factor || use_gaussian_factor) { factor = getFactor_(mz_tolerance, mz_difference, use_gaussian_factor); } sum += sqrt(s1[it->first].getIntensity() * s2[it->second].getIntensity() * factor); } score = sum / (sqrt(sum1 * sum2)); return score; }
void PeakIntegrator::getDefaultParameters(Param& params) { params.clear(); params.setValue("integration_type", INTEGRATION_TYPE_INTENSITYSUM, "The integration technique to use in integratePeak() and estimateBackground() which uses either the summed intensity, integration by Simpson's rule or trapezoidal integration."); params.setValidStrings("integration_type", ListUtils::create<String>("intensity_sum,simpson,trapezoid")); params.setValue("baseline_type", BASELINE_TYPE_BASETOBASE, "The baseline type to use in estimateBackground() based on the peak boundaries. A rectangular baseline shape is computed based either on the minimal intensity of the peak boundaries, the maximum intensity or the average intensity (base_to_base)."); params.setValidStrings("baseline_type", ListUtils::create<String>("base_to_base,vertical_division,vertical_division_min,vertical_division_max")); params.setValue("fit_EMG", "false", "Fit the chromatogram/spectrum to the EMG peak model."); params.setValidStrings("fit_EMG", ListUtils::create<String>("false,true")); }
void digestFeaturesMapSimVector_(SimTypes::FeatureMapSimVector& feature_maps) { // digest here DigestSimulation digest_sim; Param p; p.setValue("model", "naive"); p.setValue("model_naive:missed_cleavages", 0); digest_sim.setParameters(p); std::cout << digest_sim.getParameters() << std::endl; for(SimTypes::FeatureMapSimVector::iterator iter = feature_maps.begin() ; iter != feature_maps.end() ; ++iter) { digest_sim.digest((*iter)); } }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in_spectra = getStringOption_("in_spectra"); String in_identifications = getStringOption_("in_identifications"); String outfile = getStringOption_("model_output_file"); Int precursor_charge = getIntOption_("precursor_charge"); //------------------------------------------------------------- // init SvmTheoreticalSpectrumGeneratorTrainer //------------------------------------------------------------- SvmTheoreticalSpectrumGeneratorTrainer trainer; Param param = getParam_().copy("algorithm:", true); String write_files = getFlag_("write_training_files") ? "true" : "false"; param.setValue("write_training_files", write_files); trainer.setParameters(param); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap map; MzMLFile().load(in_spectra, map); std::vector<PeptideIdentification> pep_ids; std::vector<ProteinIdentification> prot_ids; String tmp_str; IdXMLFile().load(in_identifications, prot_ids, pep_ids, tmp_str); IDMapper idmapper; Param par; par.setValue("rt_tolerance", 0.001); par.setValue("mz_tolerance", 0.001); idmapper.setParameters(par); idmapper.annotate(map, pep_ids, prot_ids); //generate vector of annotations std::vector<AASequence> annotations; PeakMap::iterator it; for (it = map.begin(); it != map.end(); ++it) { annotations.push_back(it->getPeptideIdentifications()[0].getHits()[0].getSequence()); } trainer.trainModel(map, annotations, outfile, precursor_charge); return EXECUTION_OK; }
Param getSubsectionDefaults_(const String & section) const { Param p; if (section == "algorithm") { p.setValue("param1", "param1_value", "param1_description"); p.setValue("param2", "param2_value", "param2_description"); } else { p.setValue("param3", "param3_value", "param3_description"); p.setValue("param4", "param4_value", "param4_description"); } return p; }
void ElutionPeakDetection::smoothData(MassTrace& mt, int win_size) const { // alternative smoothing using SavitzkyGolay // looking at the unit test, this method gives better fits than lowess smoothing // reference paper uses lowess smoothing MSSpectrum<PeakType> spectrum; spectrum.insert(spectrum.begin(), mt.begin(), mt.end()); SavitzkyGolayFilter sg; Param param; param.setValue("polynomial_order", 2); param.setValue("frame_length", std::max(3, win_size)); // frame length must be at least polynomial_order+1, otherwise SG will fail sg.setParameters(param); sg.filter(spectrum); MSSpectrum<PeakType>::iterator iter = spectrum.begin(); std::vector<double> smoothed_intensities; for (; iter != spectrum.end(); ++iter) { smoothed_intensities.push_back(iter->getIntensity()); } mt.setSmoothedIntensities(smoothed_intensities); //alternative end // std::cout << "win_size elution: " << scan_time << " " << win_size << std::endl; // if there is no previous FWHM estimation... do it now // if (win_size == 0) // { // mt.estimateFWHM(false); // estimate FWHM // win_size = mt.getFWHMScansNum(); // } // use one global window size for all mass traces to smooth // std::vector<double> rts, ints; // // for (MassTrace::const_iterator c_it = mt.begin(); c_it != mt.end(); ++c_it) // { // rts.push_back(c_it->getRT()); // ints.push_back(c_it->getIntensity()); // } // LowessSmoothing lowess_smooth; // Param lowess_params; // lowess_params.setValue("window_size", win_size); // lowess_smooth.setParameters(lowess_params); // std::vector<double> smoothed_data; // lowess_smooth.smoothData(rts, ints, smoothed_data); // mt.setSmoothedIntensities(smoothed_data); }
Param File::getSystemParameterDefaults_() { Param p; p.setValue("version", VersionInfo::getVersion()); p.setValue("home_dir", ""); // only active when user enters something in this value p.setValue("temp_dir", ""); // only active when user enters something in this value p.setValue("id_db_dir", ListUtils::create<String>(""), String("Default directory for FASTA and psq files used as databased for id engines. ") + \ "This allows you to specify just the filename of the DB in the " + \ "respective TOPP tool, and the database will be searched in the directories specified here " + \ ""); // only active when user enters something in this value p.setValue("threads", 1); // TODO: maybe we add -log, -debug.... or.... return p; }
Param MSSim::getParameters() const { Param tmp; tmp.insert("", this->param_); // get non-labeling options vector<String> products = Factory<BaseLabeler>::registeredProducts(); tmp.setValue("Labeling:type", "labelfree", "Select the labeling type you want for your experiment"); tmp.setValidStrings("Labeling:type", products); for (vector<String>::iterator product_name = products.begin(); product_name != products.end(); ++product_name) { BaseLabeler* labeler = Factory<BaseLabeler>::create(*product_name); if (labeler) { tmp.insert("Labeling:" + *product_name + ":", labeler->getDefaultParameters()); if (!tmp.copy("Labeling:" + *product_name).empty()) { // if parameters of labeler are empty, the section will not exist and // the command below would fail tmp.setSectionDescription("Labeling:" + *product_name, labeler->getDescription()); } delete(labeler); } else { throw Exception::InvalidValue(__FILE__, __LINE__, __PRETTY_FUNCTION__, "This labeler returned by the Factory is invalid!", product_name->c_str()); } } return tmp; }
int main(int argc, const char** argv) { if (argc < 2) return 1; // the path to the data should be given on the command line String tutorial_data_path(argv[1]); PeakMap exp_raw; PeakMap exp_picked; MzMLFile mzml_file; mzml_file.load(tutorial_data_path + "/data/Tutorial_PeakPickerCWT.mzML", exp_raw); PeakPickerCWT pp; Param param; param.setValue("peak_width", 0.1); pp.setParameters(param); pp.pickExperiment(exp_raw, exp_picked); exp_picked.updateRanges(); cout << "\nMinimal fwhm of a mass spectrometric peak: " << (DoubleReal)param.getValue("peak_width") << "\n\nNumber of picked peaks " << exp_picked.getSize() << std::endl; return 0; } //end of main
// for SWATH -- get the theoretical b and y series masses for a sequence void getBYSeries(AASequence& a, // std::vector<double>& bseries, // std::vector<double>& yseries, // UInt charge // ) { OPENMS_PRECONDITION(charge > 0, "Charge is a positive integer"); TheoreticalSpectrumGenerator generator; Param p; p.setValue("add_metainfo", "true", "Adds the type of peaks as metainfo to the peaks, like y8+, [M-H2O+2H]++"); generator.setParameters(p); RichPeakSpectrum rich_spec; generator.addPeaks(rich_spec, a, Residue::BIon, charge); generator.addPeaks(rich_spec, a, Residue::YIon, charge); for (RichPeakSpectrum::iterator it = rich_spec.begin(); it != rich_spec.end(); ++it) { if (it->getMetaValue("IonName").toString()[0] == 'y') { yseries.push_back(it->getMZ()); } else if (it->getMetaValue("IonName").toString()[0] == 'b') { bseries.push_back(it->getMZ()); } } } // end getBYSeries
void TransformationModelLinear::getDefaultParameters(Param& params) { params.clear(); params.setValue("symmetric_regression", "false", "Perform linear regression" " on 'y - x' vs. 'y + x', instead of on 'y' vs. 'x'."); params.setValidStrings("symmetric_regression", ListUtils::create<String>("true,false")); }
void TransformationModelInterpolated::getDefaultParameters(Param & params) { params.clear(); params.setValue("interpolation_type", "cspline", "Type of interpolation to apply."); StringList types = StringList::create("linear,polynomial,cspline,akima"); params.setValidStrings("interpolation_type", types); }
Param getSubsectionDefaults_(const String & /*section*/) const { Param tmp; tmp.insert("Extraction:", ItraqChannelExtractor(ItraqQuantifier::FOURPLEX).getParameters()); // type is irrelevant - ini is the same tmp.insert("Quantification:", ItraqQuantifier(ItraqQuantifier::FOURPLEX).getParameters()); // type is irrelevant - ini is the same tmp.setValue("MetaInformation:Program", "OpenMS::ITRAQAnalyzer", "", StringList::create("advanced")); return tmp; }
Param getSubsectionDefaults_(const String & /*section*/) const { Param tmp; tmp.insert("Extraction:", ItraqChannelExtractor(ItraqQuantifier::TMT_SIXPLEX).getParameters()); tmp.insert("Quantification:", ItraqQuantifier(ItraqQuantifier::TMT_SIXPLEX).getParameters()); tmp.setValue("MetaInformation:Program", "OpenMS::TMTAnalyzer", "", ListUtils::create<String>("advanced")); return tmp; }
IsotopeFitter1D::QualityType IsotopeFitter1D::fit1d(const RawDataArrayType& set, InterpolationModel*& model) { // Calculate bounding box min_ = max_ = set[0].getPos(); for (UInt pos = 1; pos < set.size(); ++pos) { CoordinateType tmp = set[pos].getPos(); if (min_ > tmp) min_ = tmp; if (max_ < tmp) max_ = tmp; } // Enlarge the bounding box by a few multiples of the standard deviation { stdev1_ = sqrt(statistics_.variance()) * tolerance_stdev_box_; min_ -= stdev1_; max_ += stdev1_; } // build model if (charge_ == 0) { model = static_cast<InterpolationModel*>(Factory<BaseModel<1> >::create("GaussModel")); model->setInterpolationStep(interpolation_step_); Param tmp; tmp.setValue("bounding_box:min", min_); tmp.setValue("bounding_box:max", max_); tmp.setValue("statistics:variance", statistics_.variance()); tmp.setValue("statistics:mean", statistics_.mean()); model->setParameters(tmp); } else { model = static_cast<InterpolationModel*>(Factory<BaseModel<1> >::create("IsotopeModel")); Param iso_param = this->param_.copy("isotope_model:", true); iso_param.removeAll("stdev"); model->setParameters(iso_param); model->setInterpolationStep(interpolation_step_); Param tmp; tmp.setValue("statistics:mean", statistics_.mean()); tmp.setValue("charge", static_cast<Int>(charge_)); tmp.setValue("isotope:mode:GaussianSD", isotope_stdev_); tmp.setValue("isotope:maximum", max_isotope_); model->setParameters(tmp); (static_cast<IsotopeModel*>(model))->setSamples((static_cast<IsotopeModel*>(model))->getFormula()); } // fit offset QualityType quality; quality = fitOffset_(model, set, stdev1_, stdev1_, interpolation_step_); if (boost::math::isnan(quality)) quality = -1.0; return quality; }
Param File::getSystemParameters() { String filename = String(QDir::homePath()) + "/.OpenMS/OpenMS.ini"; Param p; if (!File::readable(filename)) // create file { p = getSystemParameterDefaults_(); String dirname = String(QDir::homePath()) + "/.OpenMS"; QDir dir(dirname.toQString()); if (!dir.exists()) { if (!File::writable(dirname)) { LOG_WARN << "Warning: Cannot create folder '.OpenMS' in user home directory. Please check your environment!" << std::endl; LOG_WARN << " Home directory determined is: " << QDir::homePath().toStdString() << "." << std::endl; return p; } dir.mkpath("."); } if (!File::writable(filename)) { LOG_WARN << "Warning: Cannot create '.OpenMS/OpenMS.ini' in user home directory. Please check your environment!" << std::endl; LOG_WARN << " Home directory determined is: " << QDir::homePath().toStdString() << "." << std::endl; return p; } ParamXMLFile paramFile; paramFile.store(filename, p); } else { ParamXMLFile paramFile; paramFile.load(filename, p); // check version if (!p.exists("version") || (p.getValue("version") != VersionInfo::getVersion())) { if (!p.exists("version")) { LOG_WARN << "Broken file '" << filename << "' discovered. The 'version' tag is missing." << std::endl; } else // old version { LOG_WARN << "File '" << filename << "' is deprecated." << std::endl; } LOG_WARN << "Updating missing/wrong entries in '" << filename << "' with defaults!" << std::endl; Param p_new = getSystemParameterDefaults_(); p.setValue("version", VersionInfo::getVersion()); // update old version, such that p_new:version does not get overwritten during update() p_new.update(p); paramFile.store(filename, p_new); } } return p; }
void TransformationModelLowess::getDefaultParameters(Param& params) { params.clear(); params.setValue("span", 2/3.0, "Fraction of datapoints (f) to use for each local regression (determines the amount of smoothing). Choosing this parameter in the range .2 to .8 usually results in a good fit."); params.setMinFloat("span", 0.0); params.setMaxFloat("span", 1.0); params.setValue("num_iterations", 3, "Number of rubstifying iterations for lowess fitting."); params.setMinInt("num_iterations", 0); params.setValue("delta", -1.0, "Nonnegative parameter which may be used to save computations (recommended value is 0.01 of the range of the input, e.g. for data ranging from 1000 seconds to 2000 seconds, it could be set to 10). Setting a negative value will automatically do this."); params.setValue("interpolation_type", "cspline", "Method to use for interpolation between datapoints computed by lowess. 'linear': Linear interpolation. 'cspline': Use the cubic spline for interpolation. 'akima': Use an akima spline for interpolation"); params.setValidStrings("interpolation_type", ListUtils::create<String>("linear,cspline,akima")); params.setValue("extrapolation_type", "four-point-linear", "Method to use for extrapolation outside the data range. 'two-point-linear': Uses a line through the first and last point to extrapolate. 'four-point-linear': Uses a line through the first and second point to extrapolate in front and and a line through the last and second-to-last point in the end. 'global-linear': Uses a linear regression to fit a line through all data points and use it for interpolation."); StringList etypes = ListUtils::create<String>("two-point-linear,four-point-linear,global-linear"); params.setValidStrings("extrapolation_type", etypes); }
/*! Converts this \l{RuleAction} to a normal \l{Action}. * \sa Action, */ Action RuleAction::toAction() const { Action action(m_actionTypeId, m_deviceId); ParamList params; foreach (const RuleActionParam &ruleActionParam, m_ruleActionParams) { Param param; param.setName(ruleActionParam.name()); param.setValue(ruleActionParam.value()); params.append(param); }
short SetParam::process(SqlciEnv * sqlci_env) { if (get_arglen() == -1) { // set param to null value Param * param = sqlci_env->get_paramlist()->get(param_name); if (param) param->makeNull(); else { param = new Param(param_name, (char*)0); sqlci_env->get_paramlist()->append(param); param->makeNull(); } } else if (!get_argument()) { /* RESET PARAM command */ if (!param_name) { /* RESET all params */ Param * param = sqlci_env->get_paramlist()->getFirst(); while (param) { sqlci_env->get_paramlist()->remove(param->getName()); delete param; param = sqlci_env->get_paramlist()->getNext(); } } else { Param * param = sqlci_env->get_paramlist()->get(param_name); sqlci_env->get_paramlist()->remove(param_name); delete param; } } else { /* SET PARAM command */ Param * param = sqlci_env->get_paramlist()->get(param_name); if (param) { param->setValue(this); } else { param = new Param(param_name, this); sqlci_env->get_paramlist()->append(param); } } return 0; }
START_SECTION(void apply(std::vector<PeptideIdentification>& ids)) { TOLERANCE_ABSOLUTE(0.01) ConsensusIDAlgorithmAverage consensus; // define parameters: Param param; param.setValue("filter:considered_hits", 5); consensus.setParameters(param); // apply: vector<PeptideIdentification> f = ids; consensus.apply(f); TEST_EQUAL(f.size(), 1); hits = f[0].getHits(); TEST_EQUAL(hits.size(), 7); TEST_EQUAL(hits[0].getRank(), 1); TEST_EQUAL(hits[0].getSequence(), AASequence::fromString("F")); TEST_REAL_SIMILAR(hits[0].getScore(), 0.0); // the two "0.2" scores are not equal (due to floating-point number effects), // therefore the ranks of the hits differ: TEST_EQUAL(hits[1].getScore() < hits[2].getScore(), true); TEST_EQUAL(hits[1].getRank(), 2); TEST_EQUAL(hits[1].getSequence(), AASequence::fromString("C")); TEST_REAL_SIMILAR(hits[1].getScore(), 0.2); TEST_EQUAL(hits[2].getRank(), 3); TEST_EQUAL(hits[2].getSequence(), AASequence::fromString("G")); TEST_REAL_SIMILAR(hits[2].getScore(), 0.2); TEST_EQUAL(hits[3].getRank(), 4); TEST_EQUAL(hits[3].getSequence(), AASequence::fromString("A")); TEST_REAL_SIMILAR(hits[3].getScore(), 0.25); TEST_EQUAL(hits[4].getRank(), 5); TEST_EQUAL(hits[4].getSequence(), AASequence::fromString("D")); TEST_REAL_SIMILAR(hits[4].getScore(), 0.35); TEST_EQUAL(hits[5].getRank(), 6); TEST_EQUAL(hits[5].getSequence(), AASequence::fromString("B")); TEST_REAL_SIMILAR(hits[5].getScore(), 0.4); TEST_EQUAL(hits[6].getRank(), 7); TEST_EQUAL(hits[6].getSequence(), AASequence::fromString("E")); TEST_REAL_SIMILAR(hits[6].getScore(), 0.5); ids[2].setHigherScoreBetter(true); TEST_EXCEPTION(Exception::InvalidValue, consensus.apply(ids)); }
BiGaussFitter1D::QualityType BiGaussFitter1D::fit1d(const RawDataArrayType& set, InterpolationModel*& model) { // Calculate bounding box CoordinateType min_bb = set[0].getPos(), max_bb = set[0].getPos(); for (UInt pos = 1; pos < set.size(); ++pos) { CoordinateType tmp = set[pos].getPos(); if (min_bb > tmp) min_bb = tmp; if (max_bb < tmp) max_bb = tmp; } // Enlarge the bounding box by a few multiples of the standard deviation const CoordinateType stdev1 = sqrt(statistics1_.variance()) * tolerance_stdev_box_; const CoordinateType stdev2 = sqrt(statistics2_.variance()) * tolerance_stdev_box_; min_bb -= stdev1; max_bb += stdev2; // build model model = static_cast<InterpolationModel*>(Factory<BaseModel<1> >::create("BiGaussModel")); model->setInterpolationStep(interpolation_step_); Param tmp; tmp.setValue("bounding_box:min", min_bb); tmp.setValue("bounding_box:max", max_bb); tmp.setValue("statistics:mean", statistics1_.mean()); tmp.setValue("statistics:variance1", statistics1_.variance()); tmp.setValue("statistics:variance2", statistics2_.variance()); model->setParameters(tmp); // fit offset QualityType quality; quality = fitOffset_(model, set, stdev1, stdev2, interpolation_step_); if (boost::math::isnan(quality)) quality = -1.0; return quality; }
START_SECTION(void apply(std::vector<PeptideIdentification>& ids)) { TOLERANCE_ABSOLUTE(0.01) ConsensusIDAlgorithmRanks consensus; // define parameters: Param param; param.setValue("filter:considered_hits", 5); consensus.setParameters(param); // apply: vector<PeptideIdentification> f = ids; consensus.apply(f); TEST_EQUAL(f.size(), 1); hits = f[0].getHits(); TEST_EQUAL(hits.size(), 7); TEST_EQUAL(hits[0].getRank(), 1); TEST_EQUAL(hits[0].getSequence(), AASequence::fromString("C")); TEST_REAL_SIMILAR(hits[0].getScore(), 0.8); TEST_EQUAL(hits[1].getRank(), 2); TEST_EQUAL(hits[1].getSequence(), AASequence::fromString("A")); TEST_REAL_SIMILAR(hits[1].getScore(), 0.6); TEST_EQUAL(hits[2].getRank(), 3); TEST_EQUAL(hits[2].getSequence(), AASequence::fromString("B")); TEST_REAL_SIMILAR(hits[2].getScore(), 0.5333); TEST_EQUAL(hits[3].getRank(), 4); TEST_EQUAL(hits[3].getSequence(), AASequence::fromString("F")); TEST_REAL_SIMILAR(hits[3].getScore(), 0.33333); TEST_EQUAL(hits[4].getRank(), 5); TEST_EQUAL(hits[4].getSequence(), AASequence::fromString("D")); TEST_REAL_SIMILAR(hits[4].getScore(), 0.26666); TEST_EQUAL(hits[5].getRank(), 6); TEST_EQUAL(hits[5].getSequence(), AASequence::fromString("G")); TEST_REAL_SIMILAR(hits[5].getScore(), 0.2); TEST_EQUAL(hits[6].getRank(), 7); TEST_EQUAL(hits[6].getSequence(), AASequence::fromString("E")); TEST_REAL_SIMILAR(hits[6].getScore(), 0.06666); }
// for SWATH -- get the theoretical b and y series masses for a sequence void getTheorMasses(AASequence& a, std::vector<double>& masses, UInt charge) { OPENMS_PRECONDITION(charge > 0, "Charge is a positive integer"); TheoreticalSpectrumGenerator generator; Param p; p.setValue("add_metainfo", "true", "Adds the type of peaks as metainfo to the peaks, like y8+, [M-H2O+2H]++"); generator.setParameters(p); RichPeakSpectrum rich_spec; generator.addPeaks(rich_spec, a, Residue::BIon, charge); generator.addPeaks(rich_spec, a, Residue::YIon, charge); generator.addPrecursorPeaks(rich_spec, a, charge); for (RichPeakSpectrum::iterator it = rich_spec.begin(); it != rich_spec.end(); ++it) { masses.push_back(it->getMZ()); } } // end getBYSeries
void TransformationModelLinear::getDefaultParameters(Param& params) { params.clear(); params.setValue("symmetric_regression", "false", "Perform linear regression" " on 'y - x' vs. 'y + x', instead of on 'y' vs. 'x'."); params.setValidStrings("symmetric_regression", ListUtils::create<String>("true,false")); params.setValue("x_weight", "", "Weight x values"); params.setValidStrings("x_weight", ListUtils::create<String>("1/x,1/x2,ln(x),")); params.setValue("y_weight", "", "Weight y values"); params.setValidStrings("y_weight", ListUtils::create<String>("1/y,1/y2,ln(y),")); params.setValue("x_datum_min", 1e-15, "Minimum x value"); params.setValue("x_datum_max", 1e15, "Maximum x value"); params.setValue("y_datum_min", 1e-15, "Minimum y value"); params.setValue("y_datum_max", 1e15, "Maximum y value"); }
short SetPattern::process(SqlciEnv * sqlci_env) { if (!get_argument()) { /* RESET PATTERN command */ if (!pattern_name) { /* RESET all patterns */ Param * pattern = sqlci_env->get_patternlist()->getFirst(); while (pattern) { sqlci_env->get_patternlist()->remove(pattern->getName()); delete pattern; pattern = sqlci_env->get_patternlist()->getNext(); } } else { Param * pattern = sqlci_env->get_patternlist()->get(pattern_name); sqlci_env->get_patternlist()->remove(pattern_name); delete pattern; } } else { /* SET PATTERN command */ Param * pattern = sqlci_env->get_patternlist()->get(pattern_name); if (pattern) { pattern->setValue(get_argument(), sqlci_env->getTerminalCharset()); } else { pattern = new Param(pattern_name, get_argument()); sqlci_env->get_patternlist()->append(pattern); } } return 0; }
int main(int argc, const char** argv) { if (argc < 2) return 1; // the path to the data should be given on the command line String tutorial_data_path(argv[1]); TOFCalibration ec; PeakMap exp_raw, calib_exp; MzMLFile mzml_file; mzml_file.load(tutorial_data_path + "/data/Tutorial_TOFCalibration_peak.mzML", calib_exp); mzml_file.load(tutorial_data_path + "/data/Tutorial_TOFCalibration_raw.mzML", exp_raw); vector<double> ref_masses; TextFile ref_file; ref_file.load(tutorial_data_path + "/data/Tutorial_TOFCalibration_masses.txt", true); for (TextFile::ConstIterator iter = ref_file.begin(); iter != ref_file.end(); ++iter) { ref_masses.push_back(String(iter->c_str()).toDouble()); } std::vector<double> ml1; ml1.push_back(418327.924993827); std::vector<double> ml2; ml2.push_back(253.645187196031); std::vector<double> ml3; ml3.push_back(-0.0414243465397252); ec.setML1s(ml1); ec.setML2s(ml2); ec.setML3s(ml3); Param param; param.setValue("PeakPicker:peak_width", 0.1); ec.setParameters(param); ec.pickAndCalibrate(calib_exp, exp_raw, ref_masses); return 0; } //end of main
TransformationModelLowess::TransformationModelLowess( const TransformationModel::DataPoints& data_, const Param& params) : model_(0) { // parameter handling/checking: params_ = params; Param defaults; getDefaultParameters(defaults); params_.setDefaults(defaults); if (data_.size() < 2) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "'lowess' model requires more data"); } // TODO copy ... TransformationModel::DataPoints data(data_); // sort data std::sort(data.begin(), data.end(), cmpFirstDimension); vector<double> x(data.size()), y(data.size()), result(data.size()); double xmin_ = data[0].first; double xmax_ = xmin_; for (Size i = 0; i < data.size(); ++i) { x[i] = data[i].first; y[i] = data[i].second; if (x[i] < xmin_) { xmin_ = x[i]; } else if (x[i] > xmax_) { xmax_ = x[i]; } } double span = params_.getValue("span"); int nsteps = params_.getValue("num_iterations"); double delta = params_.getValue("delta"); if (delta < 0.0) { delta = (xmax_ - xmin_) * 0.01; // automatically determine delta } FastLowessSmoothing::lowess(x, y, span, nsteps, delta, result); TransformationModel::DataPoints data_out; for (Size i = 0; i < result.size(); ++i) { data_out.push_back( std::make_pair(x[i], result[i]) ); } // TODO thin out data here ? we may not need that many points here to interpolate ... it is enough if we store a few datapoints Param p; TransformationModelInterpolated::getDefaultParameters(p); /// p.setValue("interpolation_type", "cspline"); // linear interpolation between lowess pts /// p.setValue("extrapolation_type", "four-point-linear"); p.setValue("interpolation_type", params_.getValue("interpolation_type")); p.setValue("extrapolation_type", params_.getValue("extrapolation_type")); // create new interpolation model based on the lowess data model_ = new TransformationModelInterpolated(data_out, p); }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList in = getStringList_("in"); String edta = getStringOption_("pos"); String out = getStringOption_("out"); String out_sep = getStringOption_("out_separator"); String out_TIC_debug = getStringOption_("auto_rt:out_debug_TIC"); StringList in_header = getStringList_("in_header"); // number of out_debug_TIC files and input files must be identical /*if (out_TIC_debug.size() > 0 && in.size() != out_TIC_debug.size()) { LOG_FATAL_ERROR << "Error: number of input file 'in' and auto_rt:out_debug_TIC files must be identical!" << std::endl; return ILLEGAL_PARAMETERS; }*/ // number of header files and input files must be identical if (in_header.size() > 0 && in.size() != in_header.size()) { LOG_FATAL_ERROR << "Error: number of input file 'in' and 'in_header' files must be identical!" << std::endl; return ILLEGAL_PARAMETERS; } if (!getFlag_("auto_rt:enabled") && !out_TIC_debug.empty()) { LOG_FATAL_ERROR << "Error: TIC output file requested, but auto_rt is not enabled! Either do not request the file or switch on 'auto_rt:enabled'." << std::endl; return ILLEGAL_PARAMETERS; } double rttol = getDoubleOption_("rt_tol"); double mztol = getDoubleOption_("mz_tol"); Size rt_collect = getIntOption_("rt_collect"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mzml_file; mzml_file.setLogType(log_type_); MSExperiment<Peak1D> exp, exp_pp; EDTAFile ed; ConsensusMap cm; ed.load(edta, cm); StringList tf_single_header0, tf_single_header1, tf_single_header2; // header content, for each column std::vector<String> vec_single; // one line for each compound, multiple columns per experiment vec_single.resize(cm.size()); for (Size fi = 0; fi < in.size(); ++fi) { // load raw data mzml_file.load(in[fi], exp); exp.sortSpectra(true); if (exp.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry." << std::endl; return INCOMPATIBLE_INPUT_DATA; } // try to detect RT peaks (only for the first input file -- all others should align!) // cm.size() might change in here... if (getFlag_("auto_rt:enabled") && fi == 0) { ConsensusMap cm_local = cm; // we might have different RT peaks for each map if 'auto_rt' is enabled cm.clear(false); // reset global list (about to be filled) // compute TIC MSChromatogram<> tic = exp.getTIC(); MSSpectrum<> tics, tic_gf, tics_pp, tics_sn; for (Size ic = 0; ic < tic.size(); ++ic) { // rewrite Chromatogram to MSSpectrum (GaussFilter requires it) Peak1D peak; peak.setMZ(tic[ic].getRT()); peak.setIntensity(tic[ic].getIntensity()); tics.push_back(peak); } // smooth (no PP_CWT here due to efficiency reasons -- large FWHM take longer!) double fwhm = getDoubleOption_("auto_rt:FHWM"); GaussFilter gf; Param p = gf.getParameters(); p.setValue("gaussian_width", fwhm * 2); // wider than FWHM, just to be sure we have a fully smoothed peak. Merging two peaks is unlikely p.setValue("use_ppm_tolerance", "false"); gf.setParameters(p); tic_gf = tics; gf.filter(tic_gf); // pick peaks PeakPickerHiRes pp; p = pp.getParameters(); p.setValue("signal_to_noise", getDoubleOption_("auto_rt:SNThreshold")); pp.setParameters(p); pp.pick(tic_gf, tics_pp); if (tics_pp.size()) { LOG_INFO << "Found " << tics_pp.size() << " auto-rt peaks at: "; for (Size ipp = 0; ipp != tics_pp.size(); ++ipp) LOG_INFO << " " << tics_pp[ipp].getMZ(); } else { LOG_INFO << "Found no auto-rt peaks. Change threshold parameters!"; } LOG_INFO << std::endl; if (!out_TIC_debug.empty()) // if debug file was given { // store intermediate steps for debug MSExperiment<> out_debug; out_debug.addChromatogram(toChromatogram(tics)); out_debug.addChromatogram(toChromatogram(tic_gf)); SignalToNoiseEstimatorMedian<MSSpectrum<> > snt; snt.init(tics); for (Size is = 0; is < tics.size(); ++is) { Peak1D peak; peak.setMZ(tic[is].getMZ()); peak.setIntensity(snt.getSignalToNoise(tics[is])); tics_sn.push_back(peak); } out_debug.addChromatogram(toChromatogram(tics_sn)); out_debug.addChromatogram(toChromatogram(tics_pp)); // get rid of "native-id" missing warning for (Size id = 0; id < out_debug.size(); ++id) out_debug[id].setNativeID(String("spectrum=") + id); mzml_file.store(out_TIC_debug, out_debug); LOG_DEBUG << "Storing debug AUTO-RT: " << out_TIC_debug << std::endl; } // add target EICs: for each m/z with no/negative RT, add all combinations of that m/z with auto-RTs // duplicate m/z entries will be ignored! // all other lines with positive RT values are copied unaffected //do not allow doubles std::set<double> mz_doubles; for (ConsensusMap::Iterator cit = cm_local.begin(); cit != cm_local.end(); ++cit) { if (cit->getRT() < 0) { if (mz_doubles.find(cit->getMZ()) == mz_doubles.end()) { mz_doubles.insert(cit->getMZ()); } else { LOG_INFO << "Found duplicate m/z entry (" << cit->getMZ() << ") for auto-rt. Skipping ..." << std::endl; continue; } ConsensusMap cm_RT_multiplex; for (MSSpectrum<>::ConstIterator itp = tics_pp.begin(); itp != tics_pp.end(); ++itp) { ConsensusFeature f = *cit; f.setRT(itp->getMZ()); cm.push_back(f); } } else { // default feature with no auto-rt LOG_INFO << "copying feature with RT " << cit->getRT() << std::endl; cm.push_back(*cit); } } // resize, since we have more positions now vec_single.resize(cm.size()); } // search for each EIC and add up Int not_found(0); Map<Size, double> quant; String description; if (fi < in_header.size()) { HeaderInfo info(in_header[fi]); description = info.header_description; } if (fi == 0) { // two additional columns for first file (theoretical RT and m/z) tf_single_header0 << "" << ""; tf_single_header1 << "" << ""; tf_single_header2 << "RT" << "mz"; } // 5 entries for each input file tf_single_header0 << File::basename(in[fi]) << "" << "" << "" << ""; tf_single_header1 << description << "" << "" << "" << ""; tf_single_header2 << "RTobs" << "dRT" << "mzobs" << "dppm" << "intensity"; for (Size i = 0; i < cm.size(); ++i) { //std::cerr << "Rt" << cm[i].getRT() << " mz: " << cm[i].getMZ() << " R " << cm[i].getMetaValue("rank") << "\n"; double mz_da = mztol * cm[i].getMZ() / 1e6; // mz tolerance in Dalton MSExperiment<>::ConstAreaIterator it = exp.areaBeginConst(cm[i].getRT() - rttol / 2, cm[i].getRT() + rttol / 2, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da); Peak2D max_peak; max_peak.setIntensity(0); max_peak.setRT(cm[i].getRT()); max_peak.setMZ(cm[i].getMZ()); for (; it != exp.areaEndConst(); ++it) { if (max_peak.getIntensity() < it->getIntensity()) { max_peak.setIntensity(it->getIntensity()); max_peak.setRT(it.getRT()); max_peak.setMZ(it->getMZ()); } } double ppm = 0; // observed m/z offset if (max_peak.getIntensity() == 0) { ++not_found; } else { // take median for m/z found std::vector<double> mz; MSExperiment<>::Iterator itm = exp.RTBegin(max_peak.getRT()); SignedSize low = std::min<SignedSize>(std::distance(exp.begin(), itm), rt_collect); SignedSize high = std::min<SignedSize>(std::distance(itm, exp.end()) - 1, rt_collect); MSExperiment<>::AreaIterator itt = exp.areaBegin((itm - low)->getRT() - 0.01, (itm + high)->getRT() + 0.01, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da); for (; itt != exp.areaEnd(); ++itt) { mz.push_back(itt->getMZ()); //std::cerr << "ppm: " << itt.getRT() << " " << itt->getMZ() << " " << itt->getIntensity() << std::endl; } if ((SignedSize)mz.size() > (low + high + 1)) LOG_WARN << "Compound " << i << " has overlapping peaks [" << mz.size() << "/" << low + high + 1 << "]" << std::endl; if (!mz.empty()) { double avg_mz = std::accumulate(mz.begin(), mz.end(), 0.0) / double(mz.size()); //std::cerr << "avg: " << avg_mz << "\n"; ppm = (avg_mz - cm[i].getMZ()) / cm[i].getMZ() * 1e6; } } // appending the second column set requires separator String append_sep = (fi == 0 ? "" : out_sep); vec_single[i] += append_sep; // new line if (fi == 0) { vec_single[i] += String(cm[i].getRT()) + out_sep + String(cm[i].getMZ()) + out_sep; } vec_single[i] += String(max_peak.getRT()) + out_sep + String(max_peak.getRT() - cm[i].getRT()) + out_sep + String(max_peak.getMZ()) + out_sep + String(ppm) + out_sep + String(max_peak.getIntensity()); } if (not_found) LOG_INFO << "Missing peaks for " << not_found << " compounds in file '" << in[fi] << "'.\n"; } //------------------------------------------------------------- // create header //------------------------------------------------------------- vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header2, out_sep)); vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header1, out_sep)); vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header0, out_sep)); //------------------------------------------------------------- // writing output //------------------------------------------------------------- TextFile tf; for (std::vector<String>::iterator v_it = vec_single.begin(); v_it != vec_single.end(); ++v_it) { tf.addLine(*v_it); } tf.store(out); return EXECUTION_OK; }