void InternalCalibration::calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name) { checkReferenceIds_(ref_ids); calibrated_feature_map = feature_map; // clear the ids for (Size f = 0; f < calibrated_feature_map.size(); ++f) { calibrated_feature_map[f].getPeptideIdentifications().clear(); } // map the reference ids onto the features IDMapper mapper; Param param; param.setValue("rt_tolerance", (DoubleReal)param_.getValue("rt_tolerance")); param.setValue("mz_tolerance", param_.getValue("mz_tolerance")); param.setValue("mz_measure", param_.getValue("mz_tolerance_unit")); mapper.setParameters(param); std::vector<ProteinIdentification> vec; mapper.annotate(calibrated_feature_map, ref_ids, vec); // calibrate calibrateMapGlobally(calibrated_feature_map, calibrated_feature_map, trafo_file_name); // copy the old ids calibrated_feature_map.setUnassignedPeptideIdentifications(feature_map.getUnassignedPeptideIdentifications()); for (Size f = 0; f < feature_map.size(); ++f) { calibrated_feature_map[f].getPeptideIdentifications().clear(); if (!feature_map[f].getPeptideIdentifications().empty()) { calibrated_feature_map[f].setPeptideIdentifications(feature_map[f].getPeptideIdentifications()); } } }
void InternalCalibration::applyTransformation_(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map) { calibrated_feature_map = feature_map; for (Size f = 0; f < feature_map.size(); ++f) { DoubleReal mz = feature_map[f].getMZ(); mz = trafo_.apply(mz); calibrated_feature_map[f].setMZ(mz); // apply transformation to convex hulls and subordinates for (Size s = 0; s < calibrated_feature_map[f].getSubordinates().size(); ++s) { // subordinates DoubleReal mz = calibrated_feature_map[f].getSubordinates()[s].getMZ(); mz = trafo_.apply(mz); calibrated_feature_map[f].getSubordinates()[s].setMZ(mz); } for (Size s = 0; s < calibrated_feature_map[f].getConvexHulls().size(); ++s) { // convex hulls std::vector<DPosition<2> > point_vec = calibrated_feature_map[f].getConvexHulls()[s].getHullPoints(); calibrated_feature_map[f].getConvexHulls()[s].clear(); for (Size p = 0; p < point_vec.size(); ++p) { DoubleReal mz = point_vec[p][1]; mz = trafo_.apply(mz); point_vec[p][1] = mz; } calibrated_feature_map[f].getConvexHulls()[s].setHullPoints(point_vec); } } }
void DeepPyramid::processFeatureMap(int filterIdx, const FeatureMap &map, vector<BoundingBox> &detectedObjects) const { Size mapSize = map.size(); Size filterSize = rootFilter[filterIdx]->getMapSize(); cout << "size: "<<map.size()<<endl; for (int width = 0; width < mapSize.width-filterSize.width; width+=stride) { for (int height = 0; height < mapSize.height-filterSize.height; height+=stride) { FeatureMap extractedMap; map.extractFeatureMap(Rect(Point(width, height), filterSize), extractedMap); if (rootFilter[filterIdx]->predict(extractedMap) == OBJECT) { BoundingBox box; box.norm5Box = Rect(Point(width, height), filterSize); box.confidence = std::fabs(rootFilter[filterIdx]->predict(extractedMap, true)); box.map = extractedMap; detectedObjects.push_back(box); } } } }
void applyFDRcutoff(FeatureMap & feature_map, double cutoff, String fdr_name) { FeatureMap out_feature_map = feature_map; out_feature_map.clear(false); for (Size i = 0; i < feature_map.size(); i++) { if ((double)feature_map[i].getMetaValue(fdr_name) < cutoff) { out_feature_map.push_back(feature_map[i]); } } feature_map = out_feature_map; }
void EDTAFile::store(const String& filename, const FeatureMap& map) const { TextFile tf; tf.addLine("RT\tm/z\tintensity\tcharge"); for (Size i = 0; i < map.size(); ++i) { const Feature& f = map[i]; tf.addLine(String(f.getRT()) + "\t" + f.getMZ() + "\t" + f.getIntensity() + "\t" + f.getCharge()); } tf.store(filename); }
void InternalCalibration::checkReferenceIds_(const FeatureMap<> & feature_map) { Size num_ids = 0; for (Size f = 0; f < feature_map.size(); ++f) { if (!feature_map[f].getPeptideIdentifications().empty() && feature_map[f].getPeptideIdentifications()[0].getHits().size() > 1) { throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: Your feature map contains PeptideIdentifications with more than one hit, use the IDFilter to select only the best hits before you map the ids to the feature map."); } else if (!feature_map[f].getPeptideIdentifications().empty()) ++num_ids; } if (num_ids < 2) { throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: Your feature map contains less than two PeptideIdentifications, can't perform a linear regression on your data."); } }
vector<double> sliceStatistics(const FeatureMap& map, Size begin, Size end) const { // If we are asked to produce stats for an empty set, return an empty vector. if (end <= begin || end > map.size()) { return vector<double>(43); } Size size = end - begin; vector<double> intensities(size); vector<double> peak_widths(size); vector<double> mz(size); vector<double> overall_qualities(size); vector<double> mz_qualities(size); vector<double> rt_qualities(size); double tic = 0.0; for (Size i = begin; i < end; ++i) { intensities[i - begin] = map[i].getIntensity(); mz[i - begin] = map[i].getMZ(); peak_widths[i - begin] = map[i].getWidth(); rt_qualities[i - begin] = map[i].getQuality(Feature::RT); mz_qualities[i - begin] = map[i].getQuality(Feature::MZ); overall_qualities[i - begin] = map[i].getOverallQuality(); tic += map[i].getIntensity(); } vector<double> results; SomeStatistics some_statistics; results.reserve(43); // 6 7-number stats + tic results.push_back(tic); results << some_statistics(intensities); results << some_statistics(mz); results << some_statistics(peak_widths); results << some_statistics(overall_qualities); results << some_statistics(rt_qualities); results << some_statistics(mz_qualities); return results; }
/// Counts the number of features with meta value @p name equal to @p value UInt count(const FeatureMap& map, const String& name, const String& value = "") { UInt count = 0; for (Size i = 0; i < map.size(); ++i) { if (map[i].metaValueExists(name)) { if (value == "") { ++count; } else { if (map[i].getMetaValue(name).toString() == value) { ++count; } } } } return count; }
void InternalCalibration::calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, String trafo_file_name) { // check if the ids checkReferenceIds_(feature_map); // first collect theoretical and observed m/z values std::vector<DoubleReal> observed_masses; std::vector<DoubleReal> theoretical_masses; for (Size f = 0; f < feature_map.size(); ++f) { // if more than one peptide id exists for this feature we can't use it as reference if (feature_map[f].getPeptideIdentifications().size() > 1) continue; if (!feature_map[f].getPeptideIdentifications().empty()) { Int charge = feature_map[f].getPeptideIdentifications()[0].getHits()[0].getCharge(); DoubleReal theo_mass = feature_map[f].getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, charge) / (DoubleReal)charge; theoretical_masses.push_back(theo_mass); observed_masses.push_back(feature_map[f].getMZ()); #ifdef DEBUG_CALIBRATION std::cout << feature_map[f].getRT() << " " << feature_map[f].getMZ() << " " << theo_mass << std::endl; std::cout << feature_map[f].getPeptideIdentifications()[0].getHits().size() << std::endl; std::cout << feature_map[f].getPeptideIdentifications()[0].getHits()[0].getSequence() << std::endl; std::cout << feature_map[f].getPeptideIdentifications()[0].getHits()[0].getCharge() << std::endl; #endif } } // then make the linear regression makeLinearRegression_(observed_masses, theoretical_masses); // apply transformation applyTransformation_(feature_map, calibrated_feature_map); if (trafo_file_name != "") { TransformationXMLFile().store(trafo_file_name, trafo_); } }
FeatureMap<> output; //parameters Param param; ParamXMLFile paramFile; paramFile.load(OPENMS_GET_TEST_DATA_PATH("FeatureFinderAlgorithmPicked.ini"), param); param = param.copy("FeatureFinder:1:algorithm:",true); //Dummy featurefinder FeatureFinder ff; FFPP ffpp; ffpp.setParameters(param); ffpp.setData(input, output, ff); ffpp.run(); TEST_EQUAL(output.size(),8); TOLERANCE_ABSOLUTE(0.001); TEST_REAL_SIMILAR(output[0].getOverallQuality(),0.8819); TEST_REAL_SIMILAR(output[1].getOverallQuality(),0.8673); TEST_REAL_SIMILAR(output[2].getOverallQuality(),0.9079); TEST_REAL_SIMILAR(output[3].getOverallQuality(),0.9271); TEST_REAL_SIMILAR(output[4].getOverallQuality(),0.9401); TEST_REAL_SIMILAR(output[5].getOverallQuality(),0.9094); TEST_REAL_SIMILAR(output[6].getOverallQuality(),0.9403); TEST_REAL_SIMILAR(output[7].getOverallQuality(),0.9243); TOLERANCE_ABSOLUTE(20.0); TEST_REAL_SIMILAR(output[0].getIntensity(),51260.0); TEST_REAL_SIMILAR(output[1].getIntensity(),44667.3); TEST_REAL_SIMILAR(output[2].getIntensity(),34613.3);
void FeatureFinder::run(const String& algorithm_name, PeakMap& input_map, FeatureMap& features, const Param& param, const FeatureMap& seeds) { // Nothing to do if there is no data if ((algorithm_name != "mrm" && input_map.empty()) || (algorithm_name == "mrm" && input_map.getChromatograms().empty())) { features.clear(true); return; } // check input { // We need updated ranges => check number of peaks if (algorithm_name != "mrm" && input_map.getSize() == 0) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder needs updated ranges on input map. Aborting."); } // We need MS1 data only => check levels if (algorithm_name != "mrm" && (input_map.getMSLevels().size() != 1 || input_map.getMSLevels()[0] != 1)) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data. Aborting."); } //Check if the peaks are sorted according to m/z if (!input_map.isSorted(true)) { LOG_WARN << "Input map is not sorted by RT and m/z! This is done now, before applying the algorithm!" << std::endl; input_map.sortSpectra(true); input_map.sortChromatograms(true); } for (Size s = 0; s < input_map.size(); ++s) { if (input_map[s].empty()) continue; if (input_map[s][0].getMZ() < 0) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on spectra that contain peaks with positive m/z values. Filter the data accordingly beforehand! Aborting."); } } } // initialize if (algorithm_name != "mrm" && algorithm_name != "centroided") { // Resize peak flag vector flags_.resize(input_map.size()); for (Size i = 0; i < input_map.size(); ++i) { flags_[i].assign(input_map[i].size(), UNUSED); } } // do the work if (algorithm_name != "none") { FeatureFinderAlgorithm* algorithm = Factory<FeatureFinderAlgorithm>::create(algorithm_name); algorithm->setParameters(param); algorithm->setData(input_map, features, *this); algorithm->setSeeds(seeds); algorithm->run(); delete(algorithm); } if (algorithm_name != "mrm") // mrm works on chromatograms; the next section is only for conventional data { //report RT apex spectrum index and native ID for each feature for (Size i = 0; i < features.size(); ++i) { //index Size spectrum_index = input_map.RTBegin(features[i].getRT()) - input_map.begin(); features[i].setMetaValue("spectrum_index", spectrum_index); //native id if (spectrum_index < input_map.size()) { String native_id = input_map[spectrum_index].getNativeID(); features[i].setMetaValue("spectrum_native_id", native_id); } else { /// @todo that happens sometimes using IsotopeWaveletFeatureFinder (Rene, Marc, Andreas, Clemens) std::cerr << "FeatureFinderAlgorithm_impl, line=" << __LINE__ << "; FixMe this cannot be, but happens" << std::endl; } } } }
std::vector<PeptideIdentification> pep_ids; String document_id; IdXMLFile file; file.load(OPENMS_GET_TEST_DATA_PATH("PrecursorIonSelection_ids.idXML"),prot_ids,pep_ids, document_id); FeatureMap features,next_features; FeatureXMLFile f_file; f_file.load(OPENMS_GET_TEST_DATA_PATH("PrecursorIonSelection_features.featureXML"),features); START_SECTION(void sortByTotalScore(FeatureMap& features)) ptr->sortByTotalScore(features); TEST_REAL_SIMILAR((double)features[0].getMetaValue("msms_score"),49485.75) END_SECTION START_SECTION(void getNextPrecursors(FeatureMap& features,FeatureMap& next_features,UInt number)) ptr->getNextPrecursors(features,next_features,2); TEST_EQUAL(next_features.size(),2) TEST_REAL_SIMILAR((double)next_features[0].getMetaValue("msms_score"),49485.75) TEST_REAL_SIMILAR((double)next_features[1].getMetaValue("msms_score"),47365) END_SECTION PrecursorIonSelectionPreprocessing preprocessing; Param param; param.setValue("precursor_mass_tolerance",0.05); param.setValue("precursor_mass_tolerance_unit","Da"); param.setValue("missed_cleavages",1); param.setValue("preprocessed_db_path",OPENMS_GET_TEST_DATA_PATH("")); preprocessing.setParameters(param); preprocessing.dbPreprocessing(OPENMS_GET_TEST_DATA_PATH("PrecursorIonSelection_db.fasta"),false); Param param2; param2.setValue("Preprocessing:precursor_mass_tolerance",0.05); param2.setValue("Preprocessing:precursor_mass_tolerance_unit","Da");
void processInput(const char * filename, FeatureMap & feature_map) { FeatureMap out_feature_map = feature_map; std::map<String, int> added_already; out_feature_map.clear(false); std::map<String, Feature*> feature_map_ref; //for (FeatureMap::iterator feature = feature_map.begin(); feature != feature_map.end(); feature++) for (Size i = 0; i < feature_map.size(); i++) { feature_map_ref[feature_map[i].getUniqueId()] = &feature_map[i]; } std::ifstream data(filename); std::string line; // Read header std::getline(data, line); // std::map<int, String> header_dict; // not used std::map<String, int> header_dict_inv; { std::stringstream lineStream(line); std::string cell; int cnt = 0; while (std::getline(lineStream,cell,'\t')) { //header_dict[cnt] = cell; header_dict_inv[cell] = cnt; cnt++; } } if (header_dict_inv.find("id") == header_dict_inv.end() || header_dict_inv.find("m_score") == header_dict_inv.end() || header_dict_inv.find("d_score") == header_dict_inv.end() ) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: The tsv file is expected to have at least the following headers: id, m_score, d_score. " ); } // Read file std::vector<std::string> current_row; std::string cell; int line_nr = 0; double m_score, d_score; while (std::getline(data, line)) { line_nr++; current_row.clear(); std::stringstream lineStream(line); while (std::getline(lineStream,cell,'\t')) { current_row.push_back(cell); } String id = current_row[header_dict_inv["id"]]; id = id.substitute("f_", ""); try { m_score = ((String)current_row[header_dict_inv["m_score"]]).toDouble(); } catch (char* /*str*/) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Could not convert String" + ((String)current_row[header_dict_inv["m_score"]]) + " on line " + String(line_nr)); } try { d_score = ((String)current_row[header_dict_inv["d_score"]]).toDouble(); } catch (char* /*str*/) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Could not convert String" + ((String)current_row[header_dict_inv["d_score"]]) + " on line " + String(line_nr)); } if (feature_map_ref.find(id) != feature_map_ref.end() ) { Feature* feature = feature_map_ref.find(id)->second; feature->setMetaValue("m_score", m_score); feature->setMetaValue("d_score", d_score); // we are not allowed to have duplicate unique ids if (added_already.find(id) != added_already.end()) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Duplicate id found in CSV file: " + id ); } out_feature_map.push_back(*feature); } } feature_map = out_feature_map; }
FileHandler a; TEST_EQUAL(a.getOptions().hasMSLevels(), false) END_SECTION START_SECTION((PeakFileOptions & getOptions())) FileHandler a; a.getOptions().addMSLevel(1); TEST_EQUAL(a.getOptions().hasMSLevels(), true); END_SECTION START_SECTION((template <class FeatureType> bool loadFeatures(const String &filename, FeatureMap<FeatureType>&map, FileTypes::Type force_type = FileTypes::UNKNOWN))) FileHandler tmp; FeatureMap map; TEST_EQUAL(tmp.loadFeatures("test.bla", map), false) TEST_EQUAL(tmp.loadFeatures(OPENMS_GET_TEST_DATA_PATH("FeatureXMLFile_2_options.featureXML"), map), true) TEST_EQUAL(map.size(), 7); TEST_EQUAL(tmp.loadFeatures(OPENMS_GET_TEST_DATA_PATH("FeatureXMLFile_2_options.featureXML"), map), true) TEST_EQUAL(map.size(), 7); END_SECTION START_SECTION((template <class PeakType> void storeExperiment(const String &filename, const MSExperiment<PeakType>&exp, ProgressLogger::LogType log = ProgressLogger::NONE))) FileHandler fh; PeakMap exp; fh.loadExperiment(OPENMS_GET_TEST_DATA_PATH("MzMLFile_1.mzML"), exp); //test mzML String filename; NEW_TMP_FILE(filename) fh.storeExperiment(filename, exp); TEST_EQUAL(fh.getTypeByContent(filename), FileTypes::MZML)
ExitCodes outputTo(ostream& os) { //------------------------------------------------------------- // Parameter handling //------------------------------------------------------------- // File names String in = getStringOption_("in"); // File type FileHandler fh; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } MSExperiment<Peak1D> exp; FeatureMap feat; ConsensusMap cons; if (in_type == FileTypes::FEATUREXML) //features { FeatureXMLFile().load(in, feat); feat.updateRanges(); } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { ConsensusXMLFile().load(in, cons); cons.updateRanges(); } //------------------------------------------------------------- // meta information //------------------------------------------------------------- if (getFlag_("m")) { os << endl << "-- General information --" << endl << endl << "file name: " << in << endl << "file type: " << FileTypes::typeToName(in_type) << endl; //basic info os << endl << "-- Meta information --" << endl << endl; if (in_type == FileTypes::FEATUREXML) //features { os << "Document id : " << feat.getIdentifier() << endl << endl; } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { os << "Document id : " << cons.getIdentifier() << endl << endl; } } //------------------------------------------------------------- // data processing //------------------------------------------------------------- if (getFlag_("p")) { //basic info os << endl << "-- Data processing information --" << endl << endl; //get data processing info vector<DataProcessing> dp; if (in_type == FileTypes::FEATUREXML) //features { dp = feat.getDataProcessing(); } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { dp = cons.getDataProcessing(); } int i = 0; for (vector<DataProcessing>::iterator it = dp.begin(); it != dp.end(); ++it) { os << "Data processing " << i << endl; os << "\tcompletion_time: " << (*it).getCompletionTime().getDate() << 'T' << (*it).getCompletionTime().getTime() << endl; os << "\tsoftware name: " << (*it).getSoftware().getName() << " version " << (*it).getSoftware().getVersion() << endl; for (set<DataProcessing::ProcessingAction>::const_iterator paIt = (*it).getProcessingActions().begin(); paIt != (*it).getProcessingActions().end(); ++paIt) { os << "\t\tprocessing action: " << DataProcessing::NamesOfProcessingAction[*paIt] << endl; } } ++i; } //------------------------------------------------------------- // statistics //------------------------------------------------------------- if (getFlag_("s")) { //------------------------------------------------------------- // Content statistics //------------------------------------------------------------- Map<String, int> meta_names; if (in_type == FileTypes::FEATUREXML) //features { os << "Number of features: " << feat.size() << endl << endl << "Ranges:" << endl << " retention time: " << String::number(feat.getMin()[Peak2D::RT], 2) << " : " << String::number(feat.getMax()[Peak2D::RT], 2) << endl << " mass-to-charge: " << String::number(feat.getMin()[Peak2D::MZ], 2) << " : " << String::number(feat.getMax()[Peak2D::MZ], 2) << endl << " intensity: " << String::number(feat.getMinInt(), 2) << " : " << String::number(feat.getMaxInt(), 2) << endl << endl; // Charge distribution Map<UInt, UInt> charges; for (Size i = 0; i < feat.size(); ++i) { charges[feat[i].getCharge()]++; } os << "Charge distribution" << endl; for (Map<UInt, UInt>::const_iterator it = charges.begin(); it != charges.end(); ++it) { os << "charge " << it->first << ": " << it->second << endl; } } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = cons.begin(); cmit != cons.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } os << endl << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { os << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } os << " total: " << setw(6) << cons.size() << endl << endl; os << "Ranges:" << endl << " retention time: " << String::number(cons.getMin()[Peak2D::RT], 2) << " : " << String::number(cons.getMax()[Peak2D::RT], 2) << endl << " mass-to-charge: " << String::number(cons.getMin()[Peak2D::MZ], 2) << " : " << String::number(cons.getMax()[Peak2D::MZ], 2) << endl << " intensity: " << String::number(cons.getMinInt(), 2) << " : " << String::number(cons.getMaxInt(), 2) << endl; // file descriptions const ConsensusMap::FileDescriptions& descs = cons.getFileDescriptions(); if (!descs.empty()) { os << endl << "File descriptions:" << endl; for (ConsensusMap::FileDescriptions::const_iterator it = descs.begin(); it != descs.end(); ++it) { os << " - " << it->second.filename << endl << " identifier: " << it->first << endl << " label : " << it->second.label << endl << " size : " << it->second.size << endl; } } } os << endl << "-- Summary Statistics --" << endl << endl; } if (in_type == FileTypes::FEATUREXML) //features { feat.sortByRT(); vector<double> slice_stats; Size n = getIntOption_("n"); Size begin = 0; Size end = 0; os << "#slice\tRT_begin\tRT_end\tnumber_of_features\ttic\t" << "int_mean\tint_stddev\tint_min\tint_max\tint_median\tint_lowerq\tint_upperq\t" << "mz_mean\tmz_stddev\tmz_min\tmz_max\tmz_median\tmz_lowerq\tmz_upperq\t" << "width_mean\twidth_stddev\twidth_min\twidth_max\twidth_median\twidth_lowerq\twidth_upperq\t" << "qual_mean\tqual_stddev\tqual_min\tqual_max\tqual_median\tqual_lowerq\tqual_upperq\t" << "rt_qual_mean\trt_qual_stddev\trt_qual_min\trt_qual_max\trt_qual_median\trt_qual_lowerq\trt_qual_upperq\t" << "mz_qual_mean\tmz_qual_stddev\tmz_qual_min\tmz_qual_max\tmz_qual_median\tmz_qual_lowerq\tmz_qual_upperq" << endl; double rt_begin = 0.0; for (Size slice = 0; slice < n; ++slice) { // Determine slice boundaries. double rt_end = feat.back().getRT() / (double)n * (slice + 1); for (end = begin; end < feat.size() && feat[end].getRT() < rt_end; ++end) {} // Compute statistics on all features in this slice. slice_stats = sliceStatistics(feat, begin, end); // Write the beginning and end of the slices to the output as well as the slice index. os << slice << "\t" << rt_begin << "\t" << rt_end << "\t" << end - begin << "\t"; // Write the statistics as a line of an csv file copy(slice_stats.begin(), slice_stats.end(), ostream_iterator<double>(os, "\t")); os << endl; begin = end; rt_begin = rt_end; } } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { Size size = cons.size(); vector<double> intensities; intensities.reserve(size); vector<double> qualities(size); qualities.reserve(size); vector<double> widths(size); widths.reserve(size); vector<double> rt_delta_by_elems; vector<double> rt_aad_by_elems; vector<double> rt_aad_by_cfs; rt_aad_by_cfs.reserve(size); vector<double> mz_delta_by_elems; vector<double> mz_aad_by_elems; vector<double> mz_aad_by_cfs; mz_aad_by_cfs.reserve(size); vector<double> it_delta_by_elems; vector<double> it_aad_by_elems; vector<double> it_aad_by_cfs; it_aad_by_cfs.reserve(size); for (ConsensusMap::const_iterator cm_iter = cons.begin(); cm_iter != cons.end(); ++cm_iter) { double rt_aad = 0; double mz_aad = 0; double it_aad = 0; intensities.push_back(cm_iter->getIntensity()); qualities.push_back(cm_iter->getQuality()); widths.push_back(cm_iter->getWidth()); for (ConsensusFeature::HandleSetType::const_iterator hs_iter = cm_iter->begin(); hs_iter != cm_iter->end(); ++hs_iter) { double rt_diff = hs_iter->getRT() - cm_iter->getRT(); rt_delta_by_elems.push_back(rt_diff); if (rt_diff < 0) { rt_diff = -rt_diff; } rt_aad_by_elems.push_back(rt_diff); rt_aad += rt_diff; double mz_diff = hs_iter->getMZ() - cm_iter->getMZ(); mz_delta_by_elems.push_back(mz_diff); if (mz_diff < 0) { mz_diff = -mz_diff; } mz_aad_by_elems.push_back(mz_diff); mz_aad += mz_diff; double it_ratio = hs_iter->getIntensity() / (cm_iter->getIntensity() ? cm_iter->getIntensity() : 1.); it_delta_by_elems.push_back(it_ratio); if (it_ratio < 1.) { it_ratio = 1. / it_ratio; } it_aad_by_elems.push_back(it_ratio); it_aad += it_ratio; } if (!cm_iter->empty()) { rt_aad /= cm_iter->size(); mz_aad /= cm_iter->size(); it_aad /= cm_iter->size(); } // otherwise rt_aad etc. are 0 anyway rt_aad_by_cfs.push_back(rt_aad); mz_aad_by_cfs.push_back(mz_aad); it_aad_by_cfs.push_back(it_aad); } OpenMS::SomeStatistics some_statistics; os.precision(writtenDigits(ConsensusFeature::IntensityType())); os << "Intensities of consensus features:" << endl << some_statistics(intensities) << endl; os.precision(writtenDigits(ConsensusFeature::QualityType())); os << "Qualities of consensus features:" << endl << some_statistics(qualities) << endl; os.precision(writtenDigits(ConsensusFeature::CoordinateType())); os << "Retention time differences ( element-center, weight 1 per element):" << endl << some_statistics(rt_delta_by_elems) << endl; os << "Absolute retention time differences ( |element-center|, weight 1 per element):" << endl << some_statistics(rt_aad_by_elems) << endl; os << "Average absolute differences of retention time within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(rt_aad_by_cfs) << endl; os.precision(writtenDigits(ConsensusFeature::CoordinateType())); os << "Mass-to-charge differences ( element-center, weight 1 per element):" << endl << some_statistics(mz_delta_by_elems) << endl; os << "Absolute differences of mass-to-charge ( |element-center|, weight 1 per element):" << endl << some_statistics(mz_aad_by_elems) << endl; os << "Average absolute differences of mass-to-charge within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(mz_aad_by_cfs) << endl; os.precision(writtenDigits(ConsensusFeature::IntensityType())); os << "Intensity ratios ( element/center, weight 1 per element):" << endl << some_statistics(it_delta_by_elems) << endl; os << "Relative intensity error ( max{(element/center),(center/element)}, weight 1 per element):" << endl << some_statistics(it_aad_by_elems) << endl; os << "Average relative intensity error within consensus features ( max{(element/center),(center/element)}, weight 1 per consensus features):" << endl << some_statistics(it_aad_by_cfs) << endl; } return EXECUTION_OK; }
// Wrong assignment of the mono-isotopic mass for precursors are assumed: // - if precursor_mz matches the mz of a non-monoisotopic feature mass trace // - and in the case that believe_charge is true: if feature_charge matches the precursor_charge // In the case of wrong mono-isotopic assignment several options for correction are available: // keep_original will create a copy of the precursor and tandem spectrum for the new mono-isotopic mass trace and retain the original one // all_matching_features does this not for only the closest feature but all features in a question set<Size> correctToNearestFeature(const FeatureMap& features, PeakMap & exp, double rt_tolerance_s = 0.0, double mz_tolerance = 0.0, bool ppm = true, bool believe_charge = false, bool keep_original = false, bool all_matching_features = false, int max_trace = 2) { set<Size> corrected_precursors; // for each precursor/MS2 find all features that are in the given tolerance window (bounding box + rt tolerances) // if believe_charge is set, only add features that match the precursor charge map<Size, set<Size> > scan_idx_to_feature_idx; for (Size scan = 0; scan != exp.size(); ++scan) { // skip non-tandem mass spectra if (exp[scan].getMSLevel() != 2 || exp[scan].getPrecursors().empty()) continue; // extract precusor / MS2 information const double pc_mz = exp[scan].getPrecursors()[0].getMZ(); const double rt = exp[scan].getRT(); const int pc_charge = exp[scan].getPrecursors()[0].getCharge(); for (Size f = 0; f != features.size(); ++f) { // feature is incompatible if believe_charge is set and charges don't match if (believe_charge && features[f].getCharge() != pc_charge) continue; // check if precursor/MS2 position overlap with feature if (overlaps_(features[f], rt, pc_mz, rt_tolerance_s)) { scan_idx_to_feature_idx[scan].insert(f); } } } // filter sets to retain compatible features: // if precursor_mz = feature_mz + n * feature_charge (+/- mz_tolerance) a feature is compatible, others are removed from the set for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it) { const Size scan = it->first; const double pc_mz = exp[scan].getPrecursors()[0].getMZ(); const double mz_tolerance_da = ppm ? pc_mz * mz_tolerance * 1e-6 : mz_tolerance; // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ) { if (!compatible_(features[*sit], pc_mz, mz_tolerance_da, max_trace)) { it->second.erase(sit++); } else { ++sit; } } } // remove entries with no compatible features (empty sets). // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ) { if (it->second.empty()) { scan_idx_to_feature_idx.erase(it++); } else { ++it; } } if (debug_level_ > 0) { LOG_INFO << "Number of precursors with compatible features: " << scan_idx_to_feature_idx.size() << endl; } if (!all_matching_features) { // keep only nearest features in set for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it) { const Size scan = it->first; const double pc_rt = exp[scan].getRT(); double min_distance = 1e16; set<Size>::iterator best_feature = it->second.begin(); // determine nearest/best feature for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ++sit) { const double current_distance = fabs(pc_rt - features[*sit].getRT()); if (current_distance < min_distance) { min_distance = current_distance; best_feature = sit; } } // delete all except the nearest/best feature // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ) { if (sit != best_feature) { it->second.erase(sit++); } else { ++sit; } } } } // depending on all_matching_features option, only the nearest or all features are contained in the sets // depending on options: move/copy corrected precursor and tandem spectrum if (keep_original) { // duplicate spectra for each feature in set and adapt precursor_mz and precursor_charge to feature_mz and feature_charge for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it) { const Size scan = it->first; MSSpectrum<> spectrum = exp[scan]; corrected_precursors.insert(scan); for (set<Size>::iterator f_it = it->second.begin(); f_it != it->second.end(); ++f_it) { spectrum.getPrecursors()[0].setMZ(features[*f_it].getMZ()); spectrum.getPrecursors()[0].setCharge(features[*f_it].getCharge()); exp.addSpectrum(spectrum); } } } else { // set precursor_mz and _charge to the feature_mz and _charge for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it) { const Size scan = it->first; exp[scan].getPrecursors()[0].setMZ(features[*it->second.begin()].getMZ()); exp[scan].getPrecursors()[0].setCharge(features[*it->second.begin()].getCharge()); corrected_precursors.insert(scan); } } return corrected_precursors; }
void AbsoluteQuantitation::quantifyComponents(FeatureMap& unknowns) { //Potential Optimizations: create a map for each unknown FeatureMap // to reduce multiple loops // initialize all other variables Feature empty_feature; size_t IS_component_it, IS_component_group_it; // // iterate through the unknowns // for (size_t i = 0; i < unknowns.size(); i++) // { // iterate through each component_group/feature for (size_t feature_it = 0; feature_it < unknowns.size(); ++feature_it) { String component_group_name = (String)unknowns[feature_it].getMetaValue("PeptideRef"); Feature unknowns_quant_feature; // iterate through each component/sub-feature for (size_t sub_it = 0; sub_it < unknowns[feature_it].getSubordinates().size(); ++sub_it) { String component_name = (String)unknowns[feature_it].getSubordinates()[sub_it].getMetaValue("native_id"); // apply the calibration curve to components that are in the quant_method if (quant_methods_.count(component_name)>0) { double calculated_concentration = 0.0; std::map<String,AbsoluteQuantitationMethod>::iterator quant_methods_it = quant_methods_.find(component_name); String quant_component_name = quant_methods_it->second.getComponentName(); String quant_IS_component_name = quant_methods_it->second.getISName(); String quant_feature_name = quant_methods_it->second.getFeatureName(); if (quant_IS_component_name != "") { // look up the internal standard for the component bool IS_found = false; // Optimization: 90% of the IS will be in the same component_group/feature for (size_t is_sub_it = 0; is_sub_it < unknowns[feature_it].getSubordinates().size(); ++is_sub_it) { String IS_component_name = (String)unknowns[feature_it].getSubordinates()[is_sub_it].getMetaValue("native_id"); if (quant_IS_component_name == IS_component_name) { IS_found = true; IS_component_group_it = feature_it; IS_component_it = is_sub_it; break; } } if (!IS_found) {// expand IS search to all components // iterate through each component_group/feature for (size_t is_feature_it = 0; is_feature_it < unknowns.size(); ++is_feature_it) { //iterate through each component/sub-feature for (size_t is_sub_it = 0; is_sub_it < unknowns[is_feature_it].getSubordinates().size(); ++is_sub_it) { String IS_component_name = (String)unknowns[is_feature_it].getSubordinates()[is_sub_it].getMetaValue("native_id"); if (quant_IS_component_name == IS_component_name) { IS_found = true; IS_component_group_it = is_feature_it; IS_component_it = is_sub_it; break; } } if (IS_found) { break; } } } if (IS_found) { String transformation_model = quant_methods_it->second.getTransformationModel(); Param transformation_model_params = quant_methods_it->second.getTransformationModelParams(); calculated_concentration = applyCalibration( unknowns[feature_it].getSubordinates()[sub_it], unknowns[IS_component_group_it].getSubordinates()[IS_component_it], quant_feature_name,transformation_model,transformation_model_params); } else { LOG_INFO << "Component " << component_name << " IS " << quant_IS_component_name << " was not found."; LOG_INFO << "No concentration will be calculated."; } } else { String transformation_model = quant_methods_it->second.getTransformationModel(); Param transformation_model_params = quant_methods_it->second.getTransformationModelParams(); calculated_concentration = applyCalibration( unknowns[feature_it].getSubordinates()[sub_it], empty_feature, quant_feature_name,transformation_model,transformation_model_params); } // add new metadata (calculated_concentration, concentration_units) to the component unknowns[feature_it].getSubordinates()[sub_it].setMetaValue("calculated_concentration",calculated_concentration); String concentration_units = quant_methods_it->second.getConcentrationUnits(); unknowns[feature_it].getSubordinates()[sub_it].setMetaValue("concentration_units",concentration_units); // calculate the bias? } else { LOG_INFO << "Component " << component_name << " does not have a quantitation method."; LOG_INFO << "No concentration will be calculated."; unknowns[feature_it].getSubordinates()[sub_it].setMetaValue("calculated_concentration",""); unknowns[feature_it].getSubordinates()[sub_it].setMetaValue("concentration_units",""); } } } // } }
ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm, bool labeled = false) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList ins; if (labeled) ins.push_back(getStringOption_("in")); else ins = getStringList_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- // check if all input files have the correct type FileTypes::Type file_type = FileHandler::getType(ins[0]); for (Size i = 0; i < ins.size(); ++i) { if (FileHandler::getType(ins[i]) != file_type) { writeLog_("Error: All input files must be of the same type!"); return ILLEGAL_PARAMETERS; } } //------------------------------------------------------------- // set up algorithm //------------------------------------------------------------- Param algorithm_param = getParam_().copy("algorithm:", true); writeDebug_("Used algorithm parameters", algorithm_param, 3); algorithm->setParameters(algorithm_param); //------------------------------------------------------------- // perform grouping //------------------------------------------------------------- // load input ConsensusMap out_map; StringList ms_run_locations; if (file_type == FileTypes::FEATUREXML) { vector<ConsensusMap > maps(ins.size()); FeatureXMLFile f; FeatureFileOptions param = f.getOptions(); // to save memory don't load convex hulls and subordinates param.setLoadSubordinates(false); param.setLoadConvexHull(false); f.setOptions(param); Size progress = 0; setLogType(ProgressLogger::CMD); startProgress(0, ins.size(), "reading input"); for (Size i = 0; i < ins.size(); ++i) { FeatureMap tmp; f.load(ins[i], tmp); out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = tmp.size(); out_map.getFileDescriptions()[i].unique_id = tmp.getUniqueId(); // copy over information on the primary MS run const StringList& ms_runs = tmp.getPrimaryMSRunPath(); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); // to save memory, remove convex hulls, subordinates: for (FeatureMap::Iterator it = tmp.begin(); it != tmp.end(); ++it) { it->getSubordinates().clear(); it->getConvexHulls().clear(); it->clearMetaInfo(); } MapConversion::convert(i, tmp, maps[i]); maps[i].updateRanges(); setProgress(progress++); } endProgress(); // exception for "labeled" algorithms: copy file descriptions if (labeled) { out_map.getFileDescriptions()[1] = out_map.getFileDescriptions()[0]; out_map.getFileDescriptions()[0].label = "light"; out_map.getFileDescriptions()[1].label = "heavy"; } // group algorithm->group(maps, out_map); } else { vector<ConsensusMap> maps(ins.size()); ConsensusXMLFile f; for (Size i = 0; i < ins.size(); ++i) { f.load(ins[i], maps[i]); maps[i].updateRanges(); // copy over information on the primary MS run const StringList& ms_runs = maps[i].getPrimaryMSRunPath(); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); } // group algorithm->group(maps, out_map); // set file descriptions: bool keep_subelements = getFlag_("keep_subelements"); if (!keep_subelements) { for (Size i = 0; i < ins.size(); ++i) { out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = maps[i].size(); out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId(); } } else { // components of the output map are not the input maps themselves, but // the components of the input maps: algorithm->transferSubelements(maps, out_map); } } // assign unique ids out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); // annotate output with data processing info addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING)); // set primary MS runs out_map.setPrimaryMSRunPath(ms_run_locations); // write output ConsensusXMLFile().store(out, out_map); // some statistics map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } LOG_INFO << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } LOG_INFO << " total: " << setw(6) << out_map.size() << endl; return EXECUTION_OK; }
START_SECTION((FeatureMap& operator = (const FeatureMap& rhs))) FeatureMap<> map1; map1.push_back(feature1); map1.push_back(feature2); map1.push_back(feature3); map1.updateRanges(); map1.setIdentifier("lsid"); map1.getDataProcessing().resize(1); map1.getProteinIdentifications().resize(1); map1.getUnassignedPeptideIdentifications().resize(1); //assignment FeatureMap<> map2; map2 = map1; TEST_EQUAL(map2.size(),3); TEST_REAL_SIMILAR(map2.getMaxInt(),1.0) TEST_STRING_EQUAL(map2.getIdentifier(),"lsid") TEST_EQUAL(map2.getDataProcessing().size(),1) TEST_EQUAL(map2.getProteinIdentifications().size(),1); TEST_EQUAL(map2.getUnassignedPeptideIdentifications().size(),1); //assignment of empty object map2 = FeatureMap<>(); TEST_EQUAL(map2.size(),0); TEST_REAL_SIMILAR(map2.getMinInt(), numeric_limits<DoubleReal>::max()) TEST_REAL_SIMILAR(map2.getMaxInt(), -numeric_limits<DoubleReal>::max()) TEST_STRING_EQUAL(map2.getIdentifier(),"") TEST_EQUAL(map2.getDataProcessing().size(),0) TEST_EQUAL(map2.getProteinIdentifications().size(),0);
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mzMLFile; mzMLFile.setLogType(log_type_); MSExperiment<Peak1D> input; mzMLFile.getOptions().addMSLevel(1); mzMLFile.load(in, input); if (input.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry."; return INCOMPATIBLE_INPUT_DATA; } //check if spectra are sorted for (Size i = 0; i < input.size(); ++i) { if (!input[i].isSorted()) { writeLog_("Error: Not all spectra are sorted according to peak m/z positions. Use FileFilter to sort the input!"); return INCOMPATIBLE_INPUT_DATA; } } //------------------------------------------------------------- // pick //------------------------------------------------------------- FeatureMap<> output; FeatureFinder ff; Param param = getParam_().copy("algorithm:", true); FFSH ffsh; ffsh.setParameters(param); ffsh.setData(input, output, ff); ffsh.run(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(output, getProcessingInfo_(DataProcessing::PEAK_PICKING)); addDataProcessing_(output, getProcessingInfo_(DataProcessing::QUANTITATION)); output.ensureUniqueId(); for (Size i = 0; i < output.size(); i++) { output[i].ensureUniqueId(); } FeatureXMLFile().store(out, output); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { vector<ProteinIdentification> prot_ids; vector<PeptideIdentification> pep_ids; ProteinHit temp_protein_hit; //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String inputfile_id = getStringOption_("id"); String inputfile_feature = getStringOption_("feature"); String inputfile_consensus = getStringOption_("consensus"); String inputfile_raw = getStringOption_("in"); String outputfile_name = getStringOption_("out"); //~ bool Ms1(getFlag_("MS1")); //~ bool Ms2(getFlag_("MS2")); bool remove_duplicate_features(getFlag_("remove_duplicate_features")); //------------------------------------------------------------- // fetch vocabularies //------------------------------------------------------------ ControlledVocabulary cv; cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo")); QcMLFile qcmlfile; //------------------------------------------------------------- // MS aqiusition //------------------------------------------------------------ String base_name = QFileInfo(QString::fromStdString(inputfile_raw)).baseName(); cout << "Reading mzML file..." << endl; MzMLFile mz_data_file; MSExperiment<Peak1D> exp; MzMLFile().load(inputfile_raw, exp); //---prep input exp.sortSpectra(); UInt min_mz = std::numeric_limits<UInt>::max(); UInt max_mz = 0; std::map<Size, UInt> mslevelcounts; qcmlfile.registerRun(base_name,base_name); //TODO use UIDs //---base MS aquisition qp String msaq_ref = base_name + "_msaq"; QcMLFile::QualityParameter qp; qp.id = msaq_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000004"; try { //~ const ControlledVocabulary::CVTerm& test = cv.getTermByName("MS aquisition result details"); //~ cout << test.name << test.id << endl; const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); //~ const ControlledVocabulary::CVTerm& term = cv.getTerm("0000004"); qp.name = term.name; ///< Name } catch (...) { qp.name = "mzML file"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //---file origin qp qp = QcMLFile::QualityParameter(); qp.name = "mzML file"; ///< Name qp.id = base_name + "_run_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000577"; qp.value = base_name; qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "instrument model"; ///< Name qp.id = base_name + "_instrument_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000031"; qp.value = exp.getInstrument().getName(); qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "completion time"; ///< Name qp.id = base_name + "_date"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000747"; qp.value = exp.getDateTime().getDate(); qcmlfile.addRunQualityParameter(base_name, qp); //---precursors at QcMLFile::Attachment at; at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000044"; at.qualityRef = msaq_ref; at.id = base_name + "_precursors"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "precursors"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); //RT at.colTypes.push_back("MS:1000040"); //MZ for (Size i = 0; i < exp.size(); ++i) { mslevelcounts[exp[i].getMSLevel()]++; if (exp[i].getMSLevel() == 2) { if (exp[i].getPrecursors().front().getMZ() < min_mz) { min_mz = exp[i].getPrecursors().front().getMZ(); } if (exp[i].getPrecursors().front().getMZ() > max_mz) { max_mz = exp[i].getPrecursors().front().getMZ(); } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(exp[i].getPrecursors().front().getMZ()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); //---aquisition results qp qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000006"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms1aquisition"; ///< Identifier qp.value = String(mslevelcounts[1]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms1 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000007"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms2aquisition"; ///< Identifier qp.value = String(mslevelcounts[2]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms2 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000008"; ///< cv accession for "aquisition results" qp.id = base_name + "_Chromaquisition"; ///< Identifier qp.value = String(exp.getChromatograms().size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of chromatograms"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000009"; at.qualityRef = msaq_ref; at.id = base_name + "_mzrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS MZ aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000010"); //MZ at.colTypes.push_back("QC:0000011"); //MZ std::vector<String> rowmz; rowmz.push_back(String(min_mz)); rowmz.push_back(String(max_mz)); at.tableRows.push_back(rowmz); qcmlfile.addRunAttachment(base_name, at); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000012"; at.qualityRef = msaq_ref; at.id = base_name + "_rtrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS RT aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000013"); //MZ at.colTypes.push_back("QC:0000014"); //MZ std::vector<String> rowrt; rowrt.push_back(String(exp.begin()->getRT())); rowrt.push_back(String(exp.getSpectra().back().getRT())); at.tableRows.push_back(rowrt); qcmlfile.addRunAttachment(base_name, at); //---ion current stability ( & tic ) qp at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000022"; at.qualityRef = msaq_ref; at.id = base_name + "_tics"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS TICs"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); at.colTypes.push_back("MS:1000285"); UInt max = 0; Size below_10k = 0; for (Size i = 0; i < exp.size(); ++i) { if (exp[i].getMSLevel() == 1) { UInt sum = 0; for (Size j = 0; j < exp[i].size(); ++j) { sum += exp[i][j].getIntensity(); } if (sum > max) { max = sum; } if (sum < 10000) { ++below_10k; } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(sum); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.id = base_name + "_ticslump"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000023"; qp.value = String((100 / exp.size()) * below_10k); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "percentage of tic slumps"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //------------------------------------------------------------- // MS id //------------------------------------------------------------ if (inputfile_id != "") { IdXMLFile().load(inputfile_id, prot_ids, pep_ids); cerr << "idXML read ended. Found " << pep_ids.size() << " peptide identifications." << endl; ProteinIdentification::SearchParameters params = prot_ids[0].getSearchParameters(); vector<String> var_mods = params.variable_modifications; //~ boost::regex re("(?<=[KR])(?=[^P])"); String msid_ref = base_name + "_msid"; QcMLFile::QualityParameter qp; qp.id = msid_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000025"; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS identification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000026"; at.qualityRef = msid_ref; at.id = base_name + "_idsetting"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS id settings"; ///< Name } at.colTypes.push_back("MS:1001013"); //MS:1001013 db name MS:1001016 version MS:1001020 taxonomy at.colTypes.push_back("MS:1001016"); at.colTypes.push_back("MS:1001020"); std::vector<String> row; row.push_back(String(prot_ids.front().getSearchParameters().db)); row.push_back(String(prot_ids.front().getSearchParameters().db_version)); row.push_back(String(prot_ids.front().getSearchParameters().taxonomy)); at.tableRows.push_back(row); qcmlfile.addRunAttachment(base_name, at); UInt spectrum_count = 0; Size peptide_hit_count = 0; UInt runs_count = 0; Size protein_hit_count = 0; set<String> peptides; set<String> proteins; Size missedcleavages = 0; for (Size i = 0; i < pep_ids.size(); ++i) { if (!pep_ids[i].empty()) { ++spectrum_count; peptide_hit_count += pep_ids[i].getHits().size(); const vector<PeptideHit>& temp_hits = pep_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { peptides.insert(temp_hits[j].getSequence().toString()); } } } for (set<String>::iterator it = peptides.begin(); it != peptides.end(); ++it) { for (String::const_iterator st = it->begin(); st != it->end() - 1; ++st) { if (*st == 'K' || *st == 'R') { ++missedcleavages; } } } for (Size i = 0; i < prot_ids.size(); ++i) { ++runs_count; protein_hit_count += prot_ids[i].getHits().size(); const vector<ProteinHit>& temp_hits = prot_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { proteins.insert(temp_hits[j].getAccession()); } } qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000037"; ///< cv accession qp.id = base_name + "_misscleave"; ///< Identifier qp.value = missedcleavages; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of missed cleavages"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000032"; ///< cv accession qp.id = base_name + "_totprot"; ///< Identifier qp.value = protein_hit_count; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000033"; ///< cv accession qp.id = base_name + "_totuniqprot"; ///< Identifier qp.value = String(proteins.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000029"; ///< cv accession qp.id = base_name + "_psms"; ///< Identifier qp.value = String(spectrum_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of PSM"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000030"; ///< cv accession qp.id = base_name + "_totpeps"; ///< Identifier qp.value = String(peptide_hit_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000031"; ///< cv accession qp.id = base_name + "_totuniqpeps"; ///< Identifier qp.value = String(peptides.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000038"; at.qualityRef = msid_ref; at.id = base_name + "_massacc"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "delta ppm tables"; } //~ delta ppm QC:0000039 RT MZ uniqueness ProteinID MS:1000885 target/decoy Score PeptideSequence MS:1000889 Annots string Similarity Charge UO:0000219 TheoreticalWeight UO:0000221 Oxidation_(M) at.colTypes.push_back("RT"); at.colTypes.push_back("MZ"); at.colTypes.push_back("Score"); at.colTypes.push_back("PeptideSequence"); at.colTypes.push_back("Charge"); at.colTypes.push_back("TheoreticalWeight"); at.colTypes.push_back("delta_ppm"); for (UInt w = 0; w < var_mods.size(); ++w) { at.colTypes.push_back(String(var_mods[w]).substitute(' ', '_')); } std::vector<double> deltas; //~ prot_ids[0].getSearchParameters(); for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it) { if (it->getHits().size() > 0) { std::vector<String> row; row.push_back(it->getRT()); row.push_back(it->getMZ()); PeptideHit tmp = it->getHits().front(); //TODO depends on score & sort vector<UInt> pep_mods; for (UInt w = 0; w < var_mods.size(); ++w) { pep_mods.push_back(0); } for (AASequence::ConstIterator z = tmp.getSequence().begin(); z != tmp.getSequence().end(); ++z) { Residue res = *z; String temp; if (res.getModification().size() > 0 && res.getModification() != "Carbamidomethyl") { temp = res.getModification() + " (" + res.getOneLetterCode() + ")"; //cout<<res.getModification()<<endl; for (UInt w = 0; w < var_mods.size(); ++w) { if (temp == var_mods[w]) { //cout<<temp; pep_mods[w] += 1; } } } } row.push_back(tmp.getScore()); row.push_back(tmp.getSequence().toString().removeWhitespaces()); row.push_back(tmp.getCharge()); row.push_back(String((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge())); double dppm = /* std::abs */ (getMassDifference(((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()), it->getMZ(), true)); row.push_back(String(dppm)); deltas.push_back(dppm); for (UInt w = 0; w < var_mods.size(); ++w) { row.push_back(pep_mods[w]); } at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000040"; ///< cv accession qp.id = base_name + "_mean_delta"; ///< Identifier qp.value = String(OpenMS::Math::mean(deltas.begin(), deltas.end())); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "mean delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000041"; ///< cv accession qp.id = base_name + "_median_delta"; ///< Identifier qp.value = String(OpenMS::Math::median(deltas.begin(), deltas.end(), false)); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "median delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000035"; ///< cv accession qp.id = base_name + "_ratio_id"; ///< Identifier qp.value = String(double(pep_ids.size()) / double(mslevelcounts[2])); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "id ratio"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } //------------------------------------------------------------- // MS quantitation //------------------------------------------------------------ FeatureMap map; String msqu_ref = base_name + "_msqu"; if (inputfile_feature != "") { FeatureXMLFile f; f.load(inputfile_feature, map); cout << "Read featureXML file..." << endl; //~ UInt fiter = 0; map.sortByRT(); map.updateRanges(); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000045"; ///< cv accession qp.id = msqu_ref; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS quantification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000046"; ///< cv accession qp.id = base_name + "_feature_count"; ///< Identifier qp.value = String(map.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of features"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } if (inputfile_feature != "" && !remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); at.colTypes.push_back("Quality"); at.colTypes.push_back("FWHM"); at.colTypes.push_back("IDs"); UInt fiter = 0; map.sortByRT(); //ofstream out(outputfile_name.c_str()); while (fiter < map.size()) { std::vector<String> row; row.push_back(map[fiter].getMZ()); row.push_back(map[fiter].getRT()); row.push_back(map[fiter].getIntensity()); row.push_back(map[fiter].getCharge()); row.push_back(map[fiter].getOverallQuality()); row.push_back(map[fiter].getWidth()); row.push_back(map[fiter].getPeptideIdentifications().size()); fiter++; at.tableRows.push_back(row); } qcmlfile.addRunAttachment(base_name, at); } else if (inputfile_feature != "" && remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); FeatureMap map, map_out; FeatureXMLFile f; f.load(inputfile_feature, map); UInt fiter = 0; map.sortByRT(); while (fiter < map.size()) { FeatureMap map_tmp; for (UInt k = fiter; k <= map.size(); ++k) { if (abs(map[fiter].getRT() - map[k].getRT()) < 0.1) { //~ cout << fiter << endl; map_tmp.push_back(map[k]); } else { fiter = k; break; } } map_tmp.sortByMZ(); UInt retif = 1; map_out.push_back(map_tmp[0]); while (retif < map_tmp.size()) { if (abs(map_tmp[retif].getMZ() - map_tmp[retif - 1].getMZ()) > 0.01) { cout << "equal RT, but mass different" << endl; map_out.push_back(map_tmp[retif]); } retif++; } } qcmlfile.addRunAttachment(base_name, at); } if (inputfile_consensus != "") { cout << "Reading consensusXML file..." << endl; ConsensusXMLFile f; ConsensusMap map; f.load(inputfile_consensus, map); //~ String CONSENSUS_NAME = "_consensus.tsv"; //~ String combined_out = outputfile_name + CONSENSUS_NAME; //~ ofstream out(combined_out.c_str()); at = QcMLFile::Attachment(); qp.name = "consensuspoints"; ///< Name //~ qp.id = base_name + "_consensuses"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:xxxxxxxx"; ///< cv accession "featuremapper results" at.colTypes.push_back("Native_spectrum_ID"); at.colTypes.push_back("DECON_RT_(sec)"); at.colTypes.push_back("DECON_MZ_(Th)"); at.colTypes.push_back("DECON_Intensity"); at.colTypes.push_back("Feature_RT_(sec)"); at.colTypes.push_back("Feature_MZ_(Th)"); at.colTypes.push_back("Feature_Intensity"); at.colTypes.push_back("Feature_Charge"); for (ConsensusMap::const_iterator cmit = map.begin(); cmit != map.end(); ++cmit) { const ConsensusFeature& CF = *cmit; for (ConsensusFeature::const_iterator cfit = CF.begin(); cfit != CF.end(); ++cfit) { std::vector<String> row; FeatureHandle FH = *cfit; row.push_back(CF.getMetaValue("spectrum_native_id")); row.push_back(CF.getRT()); row.push_back(CF.getMZ()); row.push_back(CF.getIntensity()); row.push_back(FH.getRT()); row.push_back(FH.getMZ()); row.push_back(FH.getCharge()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); } //------------------------------------------------------------- // finalize //------------------------------------------------------------ qcmlfile.store(outputfile_name); return EXECUTION_OK; }
size_t size() { return feat_map.size(); }
ExitCodes main_(int, const char **) { //load input features FeatureMap input; FeatureXMLFile().load(getStringOption_("in"), input); //load truth consensusXML ConsensusMap truth; ConsensusXMLFile().load(getStringOption_("truth"), truth); //parameters double mz_tol = getDoubleOption_("mz_tol"); double rt_tol = getDoubleOption_("rt_tol"); //seek manual feature in automatic feature map UInt matched_pairs = 0; UInt half_matched_pairs = 0; vector<double> t_ratio, i_ratio, rt_diffs, mz_diffs; for (Size t = 0; t < truth.size(); ++t) { if (truth[t].size() != 2) { cerr << "Error: consensus feature must contain exactly two elements!" << endl; continue; } vector<Feature> best_matches(2); vector<UInt> match_counts(2, 0); vector<Peak2D> elements(2); elements[0] = *(truth[t].getFeatures().begin()); elements[1] = *(++(truth[t].getFeatures().begin())); double mz_tol_charged = mz_tol / truth[t].getCharge(); for (Size e = 0; e < 2; ++e) { double best_score = 0.0; for (Size i = 0; i < input.size(); ++i) { const Feature & f_i = input[i]; if (fabs(f_i.getRT() - elements[e].getRT()) < rt_tol && fabs(f_i.getMZ() - elements[e].getMZ()) < mz_tol_charged) { ++match_counts[e]; double score = (1.0 - fabs(f_i.getMZ() - elements[e].getMZ()) / mz_tol_charged) * (1.0 - fabs(f_i.getRT() - elements[e].getRT()) / rt_tol); if (score > best_score) { best_score = score; best_matches[e] = f_i; } } } } //not matched if (match_counts[0] == 0 && match_counts[1] == 0) { } //half matched else if ((match_counts[0] > 0 && match_counts[1] == 0) || (match_counts[0] == 0 && match_counts[1] > 0)) { ++half_matched_pairs; } //matched else { ++matched_pairs; double a_r = best_matches[0].getIntensity() / best_matches[1].getIntensity(); t_ratio.push_back(a_r); double m_r = elements[0].getIntensity() / elements[1].getIntensity(); i_ratio.push_back(m_r); rt_diffs.push_back(best_matches[1].getRT() - best_matches[0].getRT()); mz_diffs.push_back((best_matches[1].getMZ() - best_matches[0].getMZ()) * truth[t].getCharge()); } } cout << endl; cout << "pair detection statistics:" << endl; cout << "==========================" << endl; cout << "truth pairs: " << truth.size() << endl; cout << "input features: " << input.size() << endl; cout << endl; cout << "found: " << matched_pairs << " (" << String::number(100.0 * matched_pairs / truth.size(), 2) << "%)" << endl; cout << "half found : " << half_matched_pairs << " (" << String::number(100.0 * half_matched_pairs / truth.size(), 2) << "%)" << endl; cout << "not found : " << truth.size() - (matched_pairs + half_matched_pairs) << " (" << String::number(100.0 - 100.0 * (matched_pairs + half_matched_pairs) / truth.size(), 2) << "%)" << endl; cout << endl; cout << "relative pair ratios: " << fiveNumberQuotients(i_ratio, t_ratio, 3) << endl; cout << "pair distance RT : " << fiveNumbers(rt_diffs, 2) << endl; cout << "pair distance m/z: " << fiveNumbers(mz_diffs, 2) << endl; return EXECUTION_OK; }
input.updateRanges(1); FeatureMap<> output; //parameters Param param; //param.load(OPENMS_GET_TEST_DATA_PATH("FeatureFinderAlgorithmPicked.ini")); param = param.copy("FeatureFinder:1:algorithm:",true); //Dummy featurefinder FeatureFinder ff; FFSH ffsh; ffsh.setParameters(param); ffsh.setData(input, output, ff); ffsh.run(); TEST_EQUAL(output.size(), 384); //TOLERANCE_ABSOLUTE(0.001); //TEST_REAL_SIMILAR(output[0].getOverallQuality(),0.8819); //TEST_REAL_SIMILAR(output[1].getOverallQuality(),0.8673); // ... //TOLERANCE_ABSOLUTE(20.0); TEST_REAL_SIMILAR(output[0].getIntensity(),20829); TEST_REAL_SIMILAR(output[1].getIntensity(),56818.6); // ... TEST_REAL_SIMILAR(output[0].getMZ(),300.060882568359); TEST_REAL_SIMILAR(output[1].getMZ(),300.060882568359); TEST_REAL_SIMILAR(output[0].getRT(),35.1000317866759);
} END_SECTION START_SECTION(virtual ~KroenikFile()) { delete ptr; } END_SECTION START_SECTION((template < typename FeatureMapType > void load(const String &filename, FeatureMapType &feature_map))) { KroenikFile f; FeatureMap fm; f.load(OPENMS_GET_TEST_DATA_PATH("KroenikFile_test_1.krf"), fm); TEST_EQUAL(fm.size(),3) ABORT_IF(fm.size()!=3) TEST_EQUAL(fm[0].getRT(), 63.2) TEST_REAL_SIMILAR(fm[0].getMZ(), 1002.11) TEST_EQUAL(fm[0].getIntensity(), 999999) TEST_EQUAL(fm[0].getCharge(), 1) TEST_EQUAL(String(fm[0].getMetaValue("AveragineModifications")), String("Carbamido")) TEST_EQUAL(fm[1].getRT(), 62.2) TEST_REAL_SIMILAR(fm[1].getMZ(), 252.057 ) TEST_EQUAL(fm[1].getIntensity(), 9999) TEST_EQUAL(fm[1].getCharge(), 2) TEST_EQUAL(String(fm[1].getMetaValue("AveragineModifications")), String("Carbamido2")) TEST_EXCEPTION(Exception::ParseError, f.load(OPENMS_GET_TEST_DATA_PATH("KroenikFile_test_2.krf"), fm));
iw->identifyCharge (*spec, map[0], 0, 0, 0, false); NOT_TESTABLE END_SECTION START_SECTION(void updateBoxStates(const MSExperiment< PeakType > &map, const Size scan_index, const UInt RT_interleave, const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1)) iw->updateBoxStates(map, INT_MAX, 0, 0); NOT_TESTABLE END_SECTION START_SECTION((virtual std::multimap<double, Box> getClosedBoxes ())) TEST_EQUAL (iw->getClosedBoxes().size(), 1) END_SECTION START_SECTION(FeatureMap< Feature > mapSeeds2Features(const MSExperiment< PeakType > &map, const UInt RT_votes_cutoff)) FeatureMap f = iw->mapSeeds2Features(map, 0); TEST_EQUAL (f.size(), 1) END_SECTION START_SECTION(void mergeFeatures(IsotopeWaveletTransform< PeakType > *later_iwt, const UInt RT_interleave, const UInt RT_votes_cutoff)) NOT_TESTABLE //only via CUDA END_SECTION START_SECTION(double getLinearInterpolation(const typename MSSpectrum< PeakType >::const_iterator &left_iter, const double mz_pos, const typename MSSpectrum< PeakType >::const_iterator &right_iter)) TEST_EQUAL((int)(iw->getLinearInterpolation(map[0].begin(), 1420.02, (map[0].begin()+1))*10),5) END_SECTION START_SECTION(double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)) TEST_EQUAL(iw->getLinearInterpolation(1,1, 1.5, 2, 2), 1.5) END_SECTION
} END_SECTION START_SECTION(virtual ~SpecArrayFile()) { delete ptr; } END_SECTION START_SECTION((template < typename FeatureMapType > void load(const String &filename, FeatureMapType &feature_map))) { SpecArrayFile f; FeatureMap fm; f.load(OPENMS_GET_TEST_DATA_PATH("SpecArrayFile_test_1.peplist"), fm); TEST_EQUAL(fm.size(),2) ABORT_IF(fm.size()!=2) TEST_EQUAL(fm[0].getRT(), 60.1*60) TEST_REAL_SIMILAR(fm[0].getMZ(), 500.1) TEST_EQUAL(fm[0].getIntensity(), 4343534) TEST_EQUAL(fm[0].getCharge(), 5) TEST_EQUAL(double(fm[0].getMetaValue("s/n")), 3.2) TEST_EQUAL(fm[1].getRT(), 40.1*60) TEST_REAL_SIMILAR(fm[1].getMZ(), 700.1 ) TEST_EQUAL(fm[1].getIntensity(), 222432) TEST_EQUAL(fm[1].getCharge(), 3) TEST_EQUAL(double(fm[1].getMetaValue("s/n")), 2.2) TEST_EXCEPTION(Exception::ParseError, f.load(OPENMS_GET_TEST_DATA_PATH("SpecArrayFile_test_2.peplist"), fm));
sl.push_back("xml-stylesheet"); sl.push_back("<featureMap"); sl.push_back("<feature id"); fsc.setWhitelist(sl); //std::cout << "\n\n" << fsc.compareStrings("529090", "529091") << "\n\n\n"; START_SECTION((void run(std::vector< MassTrace > &, FeatureMap &, chromatograms &))) { FeatureFindingMetabo test_ffm; // run with non-default setting (C13 isotope distance) Param p = test_ffm.getParameters(); p.setValue("mz_scoring_13C", "true"); test_ffm.setParameters(p); test_ffm.run(splitted_mt, test_fm, chromatograms); TEST_EQUAL(test_fm.size(), 93); // run with default settings (from paper using charge+isotope# dependent distances) p.setValue("report_convex_hulls", "true"); p.setValue("mz_scoring_13C", "false"); test_ffm.setParameters(p); test_ffm.run(splitted_mt, test_fm, chromatograms); TEST_EQUAL(test_fm.size(), 91); // --> this gives less features, i.e. more isotope clusters (but the input data is simulated and highly weird -- should be replaced at some point) // test annotation of input String tmp_file; NEW_TMP_FILE(tmp_file); FeatureXMLFile().store(tmp_file, test_fm); TEST_EQUAL(fsc.compareFiles(tmp_file, OPENMS_GET_TEST_DATA_PATH("FeatureFindingMetabo_output1.featureXML")), true);