void MapAlignmentTransformer::transformRetentionTimes( MSExperiment<>& msexp, const TransformationDescription& trafo, bool store_original_rt) { msexp.clearRanges(); // Transform spectra for (MSExperiment<>::iterator mse_iter = msexp.begin(); mse_iter != msexp.end(); ++mse_iter) { double rt = mse_iter->getRT(); if (store_original_rt) storeOriginalRT_(*mse_iter, rt); mse_iter->setRT(trafo.apply(rt)); } // Also transform chromatograms for (Size i = 0; i < msexp.getNrChromatograms(); ++i) { MSChromatogram<ChromatogramPeak>& chromatogram = msexp.getChromatogram(i); vector<double> original_rts; if (store_original_rt) original_rts.reserve(chromatogram.size()); for (Size j = 0; j < chromatogram.size(); j++) { double rt = chromatogram[j].getRT(); if (store_original_rt) original_rts.push_back(rt); chromatogram[j].setRT(trafo.apply(rt)); } if (store_original_rt && !chromatogram.metaValueExists("original_rt")) { chromatogram.setMetaValue("original_rt", original_rts); } } msexp.updateRanges(); }
void MapAlignmentTransformer::transformSinglePeakMap(MSExperiment<> & msexp, const TransformationDescription & trafo) { msexp.clearRanges(); // Transform spectra for (MSExperiment<>::iterator mse_iter = msexp.begin(); mse_iter != msexp.end(); ++mse_iter) { DoubleReal rt = mse_iter->getRT(); mse_iter->setRT(trafo.apply(rt)); } // Also transform chromatograms DoubleReal rt; std::vector<MSChromatogram<ChromatogramPeak> > chromatograms; for (Size i = 0; i < msexp.getChromatograms().size(); i++) { MSChromatogram<ChromatogramPeak> chromatogram = msexp.getChromatograms()[i]; for (Size j = 0; j < chromatogram.size(); j++) { rt = chromatogram[j].getRT(); chromatogram[j].setRT(trafo.apply(rt)); } chromatograms.push_back(chromatogram); } msexp.setChromatograms(chromatograms); msexp.updateRanges(); }
void TOFCalibration::matchMasses_(MSExperiment<> & calib_peaks, std::vector<std::vector<unsigned int> > & monoiso_peaks, std::vector<unsigned int> & obs_masses, std::vector<double> & exp_masses, unsigned int idx) { for (unsigned int i = 0; i < monoiso_peaks[idx].size(); ++i) { for (unsigned int j = 0; j < exp_masses_.size(); ++j) { if (fabs(((calib_peaks.begin() + idx)->begin() + (monoiso_peaks[idx])[i])->getMZ() - exp_masses_[j]) < 1) { obs_masses.push_back((monoiso_peaks[idx])[i]); exp_masses.push_back(exp_masses_[j]); break; } } } #ifdef DEBUG_CALIBRATION std::cout << "\n\n---------\nmatching monoisotopic peaks\n"; for (unsigned int i = 0; i < obs_masses.size(); ++i) { std::cout << ((calib_peaks_ft_.begin() + idx)->begin() + obs_masses[i])->getMZ() << "\t" << exp_masses[i] << std::endl; } #endif }
// lists of peptide hits in "maps" will be sorted void MapAlignmentAlgorithmIdentification::getRetentionTimes_( MSExperiment<> & experiment, SeqToList & rt_data) { for (MSExperiment<>::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) { getRetentionTimes_(exp_it->getPeptideIdentifications(), rt_data); } // duplicates should not be possible -> no need to remove them }
int main() { // create a peak map containing 4 dummy spectra and peaks MSExperiment exp; // The following examples creates a MSExperiment containing four MSSpectrum instances. for (Size i = 0; i < 4; ++i) { MSSpectrum spectrum; spectrum.setRT(i); spectrum.setMSLevel(1); for (float mz = 500.0; mz <= 900; mz += 100.0) { Peak1D peak; peak.setMZ(mz + i); spectrum.push_back(peak); } exp.addSpectrum(spectrum); } // Iteration over the RT range (2,3) and the m/z range (603,802) and print the peak positions. for (auto it = exp.areaBegin(2.0, 3.0, 603.0, 802.0); it != exp.areaEnd(); ++it) { cout << it.getRT() << " - " << it->getMZ() << endl; } // Iteration over all peaks in the experiment. // Output: RT, m/z, and intensity // Note that the retention time is stored in the spectrum (not in the peak object) for (auto s_it = exp.begin(); s_it != exp.end(); ++s_it) { for (auto p_it = s_it->begin(); p_it != s_it->end(); ++p_it) { cout << s_it->getRT() << " - " << p_it->getMZ() << " " << p_it->getIntensity() << endl; } } // We could store the spectra to a mzML file with: // MzMLFile mzml; // mzml.store(filename, exp); // And load it with // mzml.load(filename, exp); // If we wanted to load only the MS2 spectra we could speed up reading by setting: // mzml.getOptions().addMSLevel(2); // before executing: mzml.load(filename, exp); return 0; } //end of main
void SeedListGenerator::generateSeedList(const MSExperiment<>& experiment, SeedList& seeds) { seeds.clear(); for (MSExperiment<>::ConstIterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) { if (exp_it->getMSLevel() == 2) // MS2 spectrum -> look for precursor { MSExperiment<>::ConstIterator prec_it = experiment.getPrecursorSpectrum(exp_it); const vector<Precursor>& precursors = exp_it->getPrecursors(); DPosition<2> point(prec_it->getRT(), precursors[0].getMZ()); seeds.push_back(point); } } }
int main(int argc, const char** argv) { if (argc < 2) return 1; // the path to the data should be given on the command line String tutorial_data_path(argv[1]); MSExperiment spectra; MzMLFile f; // load mzML from code examples folder f.load(tutorial_data_path + "/data/Tutorial_GaussFilter.mzML", spectra); // iterate over map and output MS2 precursor information for (auto s_it = spectra.begin(); s_it != spectra.end(); ++s_it) { // we are only interested in MS2 spectra so we skip all other levels if (s_it->getMSLevel() != 2) continue; // get a reference to the precursor information const MSSpectrum& spectrum = *s_it; const vector<Precursor>& precursors = spectrum.getPrecursors(); // size check & throw exception if needed if (precursors.empty()) throw Exception::InvalidSize(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, precursors.size()); // get m/z and intensity of precursor double precursor_mz = precursors[0].getMZ(); float precursor_int = precursors[0].getIntensity(); // retrieve the precursor spectrum (the most recent MS1 spectrum) PeakMap::ConstIterator precursor_spectrum = spectra.getPrecursorSpectrum(s_it); double precursor_rt = precursor_spectrum->getRT(); // output precursor information std::cout << " precusor m/z: " << precursor_mz << " intensity: " << precursor_int << " retention time (sec.): " << precursor_rt << std::endl; } return 0; } // end of main
void markMS2Locations_(MSExperiment<> & exp, QImage & image, bool transpose, QColor color, Size size) { double xcoef = image.width(), ycoef = image.height(); if (transpose) { xcoef /= exp.getMaxRT() - exp.getMinRT(); ycoef /= exp.getMaxMZ() - exp.getMinMZ(); } else { xcoef /= exp.getMaxMZ() - exp.getMinMZ(); ycoef /= exp.getMaxRT() - exp.getMinRT(); } for (MSExperiment<>::Iterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter) { if (spec_iter->getMSLevel() == 2) { double mz = spec_iter->getPrecursors()[0].getMZ(); double rt = exp.getPrecursorSpectrum(spec_iter)->getRT(); int x, y; if (transpose) { x = int(xcoef * (rt - exp.getMinRT())); y = int(ycoef * (exp.getMaxMZ() - mz)); } else { x = int(xcoef * (mz - exp.getMinMZ())); y = int(ycoef * (exp.getMaxRT() - rt)); } addPoint_(x, y, image, color, size); //mark MS2 } } }
void TOFCalibration::applyTOFConversion_(MSExperiment<> & calib_spectra) { MSExperiment<>::iterator spec_iter = calib_spectra.begin(); MSExperiment<>::SpectrumType::iterator peak_iter; unsigned int idx = 0; //two point conversion if (ml3s_.empty()) { for (; spec_iter != calib_spectra.end(); ++spec_iter) { peak_iter = spec_iter->begin(); double ml1, ml2; if (ml1s_.size() == 1) { ml1 = ml1s_[0]; ml2 = ml2s_[0]; } else { ml1 = ml1s_[idx]; ml2 = ml2s_[idx]; } // go through current scan for (; peak_iter != spec_iter->end(); ++peak_iter) { double time = peak_iter->getMZ(); peak_iter->setPos(ml1 / 1E12 * (time * 1000 - ml2)); } ++idx; } } else { // three point conversion for (; spec_iter != calib_spectra.end(); ++spec_iter) { peak_iter = spec_iter->begin(); double ml1, ml2, ml3; if (ml1s_.size() == 1) { ml1 = ml1s_[0]; ml2 = ml2s_[0]; ml3 = ml3s_[0]; } else { ml1 = ml1s_[idx]; ml2 = ml2s_[idx]; ml3 = ml3s_[idx]; } // go through current scan for (; peak_iter != spec_iter->end(); ++peak_iter) { double time = peak_iter->getMZ(); peak_iter->setPos((-ml2 - (0.1E7 * (-5E5 + sqrt(0.25E12 - ml1 * ml2 * ml3 + ml1 * ml3 * time))) / (ml1 * ml3) + time) / ml3); } ++idx; } } }
void TOFCalibration::getMonoisotopicPeaks_(MSExperiment<> & calib_peaks, std::vector<std::vector<unsigned int> > & monoiso_peaks) { MSExperiment<>::iterator spec_iter = calib_peaks.begin(); MSExperiment<>::SpectrumType::iterator peak_iter, help_iter; #ifdef DEBUG_CALIBRATION spec_iter = calib_peaks.begin(); std::cout << "\n\nbefore---------\n\n"; // iterate through all spectra for (; spec_iter != calib_peaks.end(); ++spec_iter) { peak_iter = spec_iter->begin(); // go through current scan for (; peak_iter != spec_iter->end(); ++peak_iter) { std::cout << peak_iter->getMZ() << std::endl; } } #endif spec_iter = calib_peaks.begin(); // iterate through all spectra for (; spec_iter != calib_peaks.end(); ++spec_iter) { peak_iter = spec_iter->begin(); help_iter = peak_iter; std::vector<unsigned int> vec; // go through current scan while (peak_iter < spec_iter->end()) { while (peak_iter + 1 < spec_iter->end() && ((peak_iter + 1)->getMZ() - peak_iter->getMZ() < 1.2)) { ++peak_iter; } vec.push_back(distance(spec_iter->begin(), help_iter)); help_iter = peak_iter + 1; ++peak_iter; } monoiso_peaks.push_back(vec); } #ifdef DEBUG_CALIBRATION std::cout << "\n\nafter---------\n\n"; for (unsigned int i = 0; i < monoiso_peaks.size(); ++i) { for (unsigned int j = 0; j < monoiso_peaks[i].size(); ++j) { std::cout << i << "\t" << ((calib_peaks.begin() + i)->begin() + (monoiso_peaks[i])[j])->getMZ() << std::endl; } std::cout << "--------------\n"; } std::cout << "--------------\n\n\n"; #endif }
ExitCodes main_(int, const char **) { //---------------------------------------------------------------- // load data //---------------------------------------------------------------- String in = getStringOption_("in"); String in_featureXML = getStringOption_("in_featureXML"); String out = getStringOption_("out"); String format = getStringOption_("out_type"); if (format.trim() == "") // get from filename { try { format = out.suffix('.'); } catch (Exception::ElementNotFound & /*e*/) { format = "nosuffix"; } StringListUtils::toUpper(out_formats_); if (!ListUtils::contains(out_formats_, format.toUpper())) { LOG_ERROR << "No explicit image output format was provided via 'out_type', and the suffix ('" << format << "') does not resemble a valid type. Please fix one of them." << std::endl; return ILLEGAL_PARAMETERS; } } MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); exp.updateRanges(1); SignedSize rows = getIntOption_("height"); if (rows == 0) { rows = exp.size(); } if (rows <= 0) { writeLog_("Error: Zero rows is not possible."); return ILLEGAL_PARAMETERS; } SignedSize cols = getIntOption_("width"); if (cols == 0) { cols = UInt(ceil(exp.getMaxMZ() - exp.getMinMZ())); } if (cols <= 0) { writeLog_("Error: Zero columns is not possible."); return ILLEGAL_PARAMETERS; } //---------------------------------------------------------------- //Do the actual resampling BilinearInterpolation<double, double> bilip; bilip.getData().resize(rows, cols); if (!getFlag_("transpose")) { // scans run bottom-up: bilip.setMapping_0(0, exp.getMaxRT(), rows - 1, exp.getMinRT()); // peaks run left-right: bilip.setMapping_1(0, exp.getMinMZ(), cols - 1, exp.getMaxMZ()); for (MSExperiment<>::Iterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter) { if (spec_iter->getMSLevel() != 1) continue; for (MSExperiment<>::SpectrumType::ConstIterator peak1_iter = spec_iter->begin(); peak1_iter != spec_iter->end(); ++peak1_iter) { bilip.addValue(spec_iter->getRT(), peak1_iter->getMZ(), peak1_iter->getIntensity()); } } } else // transpose { // spectra run bottom-up: bilip.setMapping_0(0, exp.getMaxMZ(), rows - 1, exp.getMinMZ()); // scans run left-right: bilip.setMapping_1(0, exp.getMinRT(), cols - 1, exp.getMaxRT()); for (MSExperiment<>::Iterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter) { if (spec_iter->getMSLevel() != 1) continue; for (MSExperiment<>::SpectrumType::ConstIterator peak1_iter = spec_iter->begin(); peak1_iter != spec_iter->end(); ++peak1_iter) { bilip.addValue(peak1_iter->getMZ(), spec_iter->getRT(), peak1_iter->getIntensity()); } } } //---------------------------------------------------------------- //create and store image int scans = (int) bilip.getData().sizePair().first; int peaks = (int) bilip.getData().sizePair().second; MultiGradient gradient; String gradient_str = getStringOption_("gradient"); if (gradient_str != "") { gradient.fromString(String("Linear|") + gradient_str); } else { gradient.fromString("Linear|0,#FFFFFF;2,#FFFF00;11,#FFAA00;32,#FF0000;55,#AA00FF;78,#5500FF;100,#000000"); } bool use_log = getFlag_("log_intensity"); writeDebug_("log_intensity: " + String(use_log), 1); QImage image(peaks, scans, QImage::Format_RGB32); string s = getStringOption_("background_color"); QColor background_color(s.c_str()); string feature_color_string = getStringOption_("feature_color"); QColor feature_color(feature_color_string.c_str()); QPainter * painter = new QPainter(&image); painter->setPen(background_color); painter->fillRect(0, 0, peaks, scans, Qt::SolidPattern); delete painter; double factor = getDoubleOption_("max_intensity"); if (factor == 0) { factor = (*std::max_element(bilip.getData().begin(), bilip.getData().end())); } // logarithmize max. intensity as well: if (use_log) factor = std::log(factor); factor /= 100.0; for (int i = 0; i < scans; ++i) { for (int j = 0; j < peaks; ++j) { double value = bilip.getData().getValue(i, j); if (use_log) value = std::log(value); if (value > 1e-4) { image.setPixel(j, i, gradient.interpolatedColorAt(value / factor).rgb()); } else { image.setPixel(j, i, background_color.rgb()); } } } if (getFlag_("precursors")) { markMS2Locations_(exp, image, getFlag_("transpose"), getStringOption_("precursor_color").toQString(), Size(getIntOption_("precursor_size"))); } if (!in_featureXML.empty()) { FeatureMap feature_map; FeatureXMLFile ff; ff.load(in_featureXML, feature_map); markFeatureLocations_(feature_map, exp, image, getFlag_("transpose"), feature_color); } if (image.save(out.toQString(), format.c_str())) return EXECUTION_OK; else return CANNOT_WRITE_OUTPUT_FILE; }
ExitCodes main_(int, const char **) { // data to be passed through the algorithm vector<vector<SILACPattern> > data; MSQuantifications msq; vector<Clustering *> cluster_data; // // Parameter handling // map<String, DoubleReal> label_identifiers; // list defining the mass shifts of each label (e.g. "Arg6" => 6.0201290268) handleParameters_sample(); handleParameters_algorithm(); handleParameters_labels(label_identifiers); handleParameters(); if (selected_labels.empty() && !out.empty()) // incompatible parameters { writeLog_("Error: The 'out' parameter cannot be used without a label (parameter 'sample:labels'). Use 'out_features' instead."); return ILLEGAL_PARAMETERS; } // // Initializing the SILACAnalzer with our parameters // SILACAnalyzer analyzer; analyzer.setLogType(log_type_); analyzer.initialize( // section "sample" selected_labels, charge_min, charge_max, missed_cleavages, isotopes_per_peptide_min, isotopes_per_peptide_max, // section "algorithm" rt_threshold, rt_min, intensity_cutoff, intensity_correlation, model_deviation, allow_missing_peaks, // labels label_identifiers); //-------------------------------------------------- // loading input from .mzML //-------------------------------------------------- MzMLFile file; MSExperiment<Peak1D> exp; // only read MS1 spectra ... /* std::vector<int> levels; levels.push_back(1); file.getOptions().setMSLevels(levels); */ LOG_DEBUG << "Loading input..." << endl; file.setLogType(log_type_); file.load(in, exp); // set size of input map exp.updateRanges(); // extract level 1 spectra exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<MSExperiment<Peak1D>::SpectrumType>(IntList::create("1"), true)), exp.end()); // sort according to RT and MZ exp.sortSpectra(); if (out_mzq != "") { vector<vector<String> > SILAClabels = analyzer.getSILAClabels(); // list of SILAC labels, e.g. selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6" std::vector<std::vector<std::pair<String, DoubleReal> > > labels; //add none label labels.push_back(std::vector<std::pair<String, DoubleReal> >(1, std::make_pair<String, DoubleReal>(String("none"), DoubleReal(0)))); for (Size i = 0; i < SILAClabels.size(); ++i) //SILACLabels MUST be in weight order!!! { std::vector<std::pair<String, DoubleReal> > one_label; for (UInt j = 0; j < SILAClabels[i].size(); ++j) { one_label.push_back(*(label_identifiers.find(SILAClabels[i][j]))); // this dereferencing would break if all SILAClabels would not have been checked before! } labels.push_back(one_label); } msq.registerExperiment(exp, labels); //add assays msq.assignUIDs(); } MSQuantifications::QUANT_TYPES quant_type = MSQuantifications::MS1LABEL; msq.setAnalysisSummaryQuantType(quant_type); //add analysis_summary_ //-------------------------------------------------- // estimate peak width //-------------------------------------------------- LOG_DEBUG << "Estimating peak width..." << endl; PeakWidthEstimator::Result peak_width; try { peak_width = analyzer.estimatePeakWidth(exp); } catch (Exception::InvalidSize &) { writeLog_("Error: Unable to estimate peak width of input data."); return INCOMPATIBLE_INPUT_DATA; } if (in_filters == "") { //-------------------------------------------------- // filter input data //-------------------------------------------------- LOG_DEBUG << "Filtering input data..." << endl; analyzer.filterData(exp, peak_width, data); //-------------------------------------------------- // store filter results //-------------------------------------------------- if (out_filters != "") { LOG_DEBUG << "Storing filtering results..." << endl; ConsensusMap map; for (std::vector<std::vector<SILACPattern> >::const_iterator it = data.begin(); it != data.end(); ++it) { analyzer.generateFilterConsensusByPattern(map, *it); } analyzer.writeConsensus(out_filters, map); } } else { //-------------------------------------------------- // load filter results //-------------------------------------------------- LOG_DEBUG << "Loading filtering results..." << endl; ConsensusMap map; analyzer.readConsensus(in_filters, map); analyzer.readFilterConsensusByPattern(map, data); } //-------------------------------------------------- // clustering //-------------------------------------------------- LOG_DEBUG << "Clustering data..." << endl; analyzer.clusterData(exp, peak_width, cluster_data, data); //-------------------------------------------------------------- // write output //-------------------------------------------------------------- if (out_debug != "") { LOG_DEBUG << "Writing debug output file..." << endl; std::ofstream out((out_debug + ".clusters.csv").c_str()); vector<vector<DoubleReal> > massShifts = analyzer.getMassShifts(); // list of mass shifts // generate header out << std::fixed << std::setprecision(8) << "ID,RT,MZ_PEAK,CHARGE"; for (UInt i = 1; i <= massShifts[0].size(); ++i) { out << ",DELTA_MASS_" << i + 1; } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_PEAK_" << i + 1 << '_' << j; } } out << ",MZ_RAW"; for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_RAW_" << i + 1 << '_' << j; } } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",MZ_RAW_" << i + 1 << '_' << j; } } out << '\n'; // write data UInt cluster_id = 0; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterDebug(out, **it, cluster_id); } } if (out != "") { LOG_DEBUG << "Generating output consensus map..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterConsensusByCluster(map, **it); } LOG_DEBUG << "Adding meta data..." << endl; // XXX: Need a map per mass shift ConsensusMap::FileDescriptions& desc = map.getFileDescriptions(); Size id = 0; for (ConsensusMap::FileDescriptions::iterator it = desc.begin(); it != desc.end(); ++it) { if (test_mode_) it->second.filename = in; // skip path, since its not cross platform and complicates verification else it->second.filename = File::basename(in); // Write correct label // (this would crash if used without a label!) if (id > 0) it->second.label = StringList(analyzer.getSILAClabels()[id - 1]).concatenate(""); // skip first round (empty label is not listed) ++id; } std::set<DataProcessing::ProcessingAction> actions; actions.insert(DataProcessing::DATA_PROCESSING); actions.insert(DataProcessing::PEAK_PICKING); actions.insert(DataProcessing::FILTERING); actions.insert(DataProcessing::QUANTITATION); addDataProcessing_(map, getProcessingInfo_(actions)); analyzer.writeConsensus(out, map); if (out_mzq != "") { LOG_DEBUG << "Generating output mzQuantML file..." << endl; ConsensusMap numap(map); //calc. ratios for (ConsensusMap::iterator cit = numap.begin(); cit != numap.end(); ++cit) { //~ make ratio templates std::vector<ConsensusFeature::Ratio> rts; for (std::vector<MSQuantifications::Assay>::const_iterator ait = msq.getAssays().begin() + 1; ait != msq.getAssays().end(); ++ait) { ConsensusFeature::Ratio r; r.numerator_ref_ = String(msq.getAssays().begin()->uid_); r.denominator_ref_ = String(ait->uid_); r.description_.push_back("Simple ratio calc"); r.description_.push_back("light to medium/.../heavy"); //~ "<cvParam cvRef=\"PSI-MS\" accession=\"MS:1001132\" name=\"peptide ratio\"/>" rts.push_back(r); } const ConsensusFeature::HandleSetType& feature_handles = cit->getFeatures(); if (feature_handles.size() > 1) { std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fit = feature_handles.begin(); // this is unlabeled fit++; for (; fit != feature_handles.end(); ++fit) { Size ri = std::distance(feature_handles.begin(), fit); rts[ri - 1].ratio_value_ = feature_handles.begin()->getIntensity() / fit->getIntensity(); // a proper silacalanyzer algo should never have 0-intensities so no 0devison ... } } cit->setRatios(rts); } msq.addConsensusMap(numap); //add SILACAnalyzer result //~ msq.addFeatureMap();//add SILACAnalyzer evidencetrail as soon as clear what is realy contained in the featuremap //~ add AuditCollection - no such concept in TOPPTools yet analyzer.writeMzQuantML(out_mzq, msq); } } if (out_clusters != "") { LOG_DEBUG << "Generating cluster output file..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { UInt cluster_id = 0; analyzer.generateClusterConsensusByPattern(map, **it, cluster_id); } ConsensusMap::FileDescription & desc = map.getFileDescriptions()[0]; desc.filename = in; desc.label = "Cluster"; analyzer.writeConsensus(out_clusters, map); } if (out_features != "") { LOG_DEBUG << "Generating output feature map..." << endl; FeatureMap<> map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterFeatureByCluster(map, **it); } analyzer.writeFeatures(out_features, map); } return EXECUTION_OK; }
ExitCodes main_(int, const char **) { String in = getStringOption_("in"), out = getStringOption_("out"), id_out = getStringOption_("id_out"); if (out.empty() && id_out.empty()) { throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, __PRETTY_FUNCTION__, "out/id_out"); } vector<ProteinIdentification> proteins; vector<PeptideIdentification> peptides; FileTypes::Type in_type = FileHandler::getType(in); if (in_type == FileTypes::MZML) { MSExperiment<> experiment; MzMLFile().load(in, experiment); // what about unassigned peptide IDs? for (MSExperiment<>::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) { peptides.insert(peptides.end(), exp_it->getPeptideIdentifications().begin(), exp_it->getPeptideIdentifications().end()); exp_it->getPeptideIdentifications().clear(); } experiment.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(experiment, getProcessingInfo_(DataProcessing::FILTERING)); MzMLFile().store(out, experiment); } } else if (in_type == FileTypes::FEATUREXML) { FeatureMap features; FeatureXMLFile().load(in, features); features.getUnassignedPeptideIdentifications().swap(peptides); for (FeatureMap::Iterator feat_it = features.begin(); feat_it != features.end(); ++feat_it) { peptides.insert(peptides.end(), feat_it->getPeptideIdentifications().begin(), feat_it->getPeptideIdentifications().end()); feat_it->getPeptideIdentifications().clear(); } features.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(features, getProcessingInfo_(DataProcessing::FILTERING)); FeatureXMLFile().store(out, features); } } else // consensusXML { ConsensusMap consensus; ConsensusXMLFile().load(in, consensus); consensus.getUnassignedPeptideIdentifications().swap(peptides); for (ConsensusMap::Iterator cons_it = consensus.begin(); cons_it != consensus.end(); ++cons_it) { peptides.insert(peptides.end(), cons_it->getPeptideIdentifications().begin(), cons_it->getPeptideIdentifications().end()); cons_it->getPeptideIdentifications().clear(); } consensus.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(consensus, getProcessingInfo_(DataProcessing::FILTERING)); ConsensusXMLFile().store(out, consensus); } } if (!id_out.empty()) { // IDMapper can match a peptide ID to several overlapping features, // resulting in duplicates; this shouldn't be the case for peak data if (in_type != FileTypes::MZML) removeDuplicates_(peptides); IdXMLFile().store(id_out, proteins, peptides); } return EXECUTION_OK; }
void IsobaricChannelExtractor::extractChannels(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map) { if (ms_exp_data.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry.\n"; throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!"); } // clear the output map consensus_map.clear(false); consensus_map.setExperimentType("labeled_MS2"); // create predicate for spectrum checking LOG_INFO << "Selecting scans with activation mode: " << (selected_activation_ == "" ? "any" : selected_activation_) << "\n"; HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(StringList::create(selected_activation_)); // now we have picked data // --> assign peaks to channels UInt64 element_index(0); // remember the current precusor spectrum MSExperiment<Peak1D>::ConstIterator prec_spec = ms_exp_data.end(); for (MSExperiment<Peak1D>::ConstIterator it = ms_exp_data.begin(); it != ms_exp_data.end(); ++it) { // remember the last MS1 spectra as we assume it to be the precursor spectrum if (it->getMSLevel() == 1) prec_spec = it; if (selected_activation_ == "" || activation_predicate(*it)) { // check if precursor is available if (it->getPrecursors().empty()) { throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + it->getNativeID() + " with RT " + String(it->getRT())); } // check precursor constraints if (!isValidPrecursor_(it->getPrecursors()[0])) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor doesn't fulfill all constraints." << std::endl; continue; } // check precursor purity if we have a valid precursor .. if (prec_spec != ms_exp_data.end()) { const DoubleReal purity = computePrecursorPurity_(it, prec_spec); if (purity < min_precursor_purity_) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold. [purity = " << purity << "]" << std::endl; continue; } } else { LOG_INFO << "No precursor available for spectrum: " << it->getNativeID() << std::endl; } if (!(prec_spec == ms_exp_data.end()) && computePrecursorPurity_(it, prec_spec) < min_precursor_purity_) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold." << std::endl; continue; } // store RT&MZ of parent ion as centroid of ConsensusFeature ConsensusFeature cf; cf.setUniqueId(); cf.setRT(it->getRT()); cf.setMZ(it->getPrecursors()[0].getMZ()); Peak2D channel_value; channel_value.setRT(it->getRT()); // for each each channel UInt64 map_index = 0; Peak2D::IntensityType overall_intensity = 0; for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin(); cl_it != quant_method_->getChannelInformation().end(); ++cl_it) { // set mz-position of channel channel_value.setMZ(cl_it->center); // reset intensity channel_value.setIntensity(0); // as every evaluation requires time, we cache the MZEnd iterator const MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_end = it->MZEnd(cl_it->center + reporter_mass_shift_); // add up all signals for (MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_it = it->MZBegin(cl_it->center - reporter_mass_shift_); mz_it != mz_end; ++mz_it) { channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity()); } // discard contribution of this channel as it is below the required intensity threshold if (channel_value.getIntensity() < min_reporter_intensity_) { channel_value.setIntensity(0); } overall_intensity += channel_value.getIntensity(); // add channel to ConsensusFeature cf.insert(map_index++, channel_value, element_index); } // ! channel_iterator // check if we keep this feature or if it contains low-intensity quantifications if (remove_low_intensity_quantifications_ && hasLowIntensityReporter_(cf)) { continue; } // check featureHandles are not empty if (overall_intensity == 0) { cf.setMetaValue("all_empty", String("true")); } cf.setIntensity(overall_intensity); consensus_map.push_back(cf); // the tandem-scan in the order they appear in the experiment ++element_index; } } // ! Experiment iterator /// add meta information to the map registerChannelsInOutputMap_(consensus_map); }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // general variables and data //------------------------------------------------------------- FileHandler fh; vector<PeptideIdentification> peptide_identifications; vector<ProteinIdentification> protein_identifications; //------------------------------------------------------------- // reading input //------------------------------------------------------------- const String in = getStringOption_("in"); ProgressLogger logger; logger.setLogType(ProgressLogger::CMD); logger.startProgress(0, 1, "Loading..."); if (File::isDirectory(in)) { const String in_directory = File::absolutePath(in).ensureLastChar('/'); const String mz_file = getStringOption_("mz_file"); const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide"); UInt i = 0; FileHandler fh; FileTypes::Type type; MSExperiment<Peak1D> msexperiment; // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;) However, now String::toInt() might throw. map<Int, float> num_and_rt; vector<String> NativeID; // The mz-File (if given) if (!mz_file.empty()) { type = fh.getTypeByFileName(mz_file); fh.loadExperiment(mz_file, msexperiment, type); for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it) { String(spectra_it->getNativeID()).split('=', NativeID); try { num_and_rt[NativeID[1].toInt()] = spectra_it->getRT(); // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01 } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } } } // Get list of the actual Sequest .out-Files StringList in_files; if (!File::fileList(in_directory, String("*.out"), in_files)) { writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!"); } // Now get to work ... for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it) { vector<PeptideIdentification> peptide_ids_seq; ProteinIdentification protein_id_seq; vector<double> pvalues_seq; vector<String> in_file_vec; SequestOutfile sequest_outfile; writeDebug_(String("Reading file ") + *in_files_it, 3); try { sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide); in_files_it->split('.', in_file_vec); for (Size j = 0; j < peptide_ids_seq.size(); ++j) { // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files. peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i); Int scan_number = 0; if (!mz_file.empty()) { try { scan_number = in_file_vec[2].toInt(); peptide_ids_seq[j].setRT(num_and_rt[scan_number]); } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } catch (exception& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.what()); } //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge(); peptide_ids_seq[j].setMZ(real_mz); } writeDebug_(String("scan: ") + String(scan_number) + String(" RT: ") + String(peptide_ids_seq[j].getRT()) + " MZ: " + String(peptide_ids_seq[j].getMZ()) + " Ident: " + peptide_ids_seq[j].getIdentifier(), 4); peptide_identifications.push_back(peptide_ids_seq[j]); } protein_id_seq.setIdentifier(*in_files_it + "_" + i); protein_identifications.push_back(protein_id_seq); ++i; } catch (Exception::ParseError& pe) { writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")"); throw; } catch (...) { writeLog_(String("Error reading file: ") + *in_files_it); throw; } } writeDebug_("All files processed.", 3); } // ! directory else { FileTypes::Type in_type = fh.getType(in); if (in_type == FileTypes::PEPXML) { String exp_name = getStringOption_("mz_file"); String orig_name = getStringOption_("mz_name"); bool use_precursor_data = getFlag_("use_precursor_data"); if (exp_name.empty()) { PepXMLFile().load(in, protein_identifications, peptide_identifications, orig_name); } else { MSExperiment<> exp; fh.loadExperiment(exp_name, exp); if (!orig_name.empty()) { exp_name = orig_name; } PepXMLFile().load(in, protein_identifications, peptide_identifications, exp_name, exp, use_precursor_data); } } else if (in_type == FileTypes::IDXML) { IdXMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::MZIDENTML) { LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl; MzIdentMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::PROTXML) { protein_identifications.resize(1); peptide_identifications.resize(1); ProtXMLFile().load(in, protein_identifications[0], peptide_identifications[0]); } else if (in_type == FileTypes::OMSSAXML) { protein_identifications.resize(1); OMSSAXMLFile().load(in, protein_identifications[0], peptide_identifications, true); } else if (in_type == FileTypes::MASCOTXML) { String scan_regex = getStringOption_("scan_regex"); String exp_name = getStringOption_("mz_file"); MascotXMLFile::RTMapping rt_mapping; if (!exp_name.empty()) { PeakMap exp; // load only MS2 spectra: fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping); } protein_identifications.resize(1); MascotXMLFile().load(in, protein_identifications[0], peptide_identifications, rt_mapping, scan_regex); } else if (in_type == FileTypes::XML) { ProteinIdentification protein_id; XTandemXMLFile().load(in, protein_id, peptide_identifications); protein_id.setSearchEngineVersion(""); protein_id.setSearchEngine("XTandem"); protein_identifications.push_back(protein_id); String exp_name = getStringOption_("mz_file"); if (!exp_name.empty()) { PeakMap exp; fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it) { UInt id = (Int)it->getMetaValue("spectrum_id"); --id; // native IDs were written 1-based if (id < exp.size()) { it->setRT(exp[id].getRT()); double pre_mz(0.0); if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ(); it->setMZ(pre_mz); it->removeMetaValue("spectrum_id"); } else { LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl; } } } } else { writeLog_("Unknown input file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } } logger.endProgress(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- const String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } logger.startProgress(0, 1, "Storing..."); if (out_type == FileTypes::PEPXML) { bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed"); String mz_file = getStringOption_("mz_file"); String mz_name = getStringOption_("mz_name"); PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed); } else if (out_type == FileTypes::IDXML) { IdXMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::MZIDENTML) { MzIdentMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::FASTA) { Size count = 0; ofstream fasta(out.c_str(), ios::out); for (Size i = 0; i < peptide_identifications.size(); ++i) { for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l) { const PeptideHit& hit = peptide_identifications[i].getHits()[l]; fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++ << "|" << hit.getSequence().toString() << endl; String seq = hit.getSequence().toUnmodifiedString(); // FASTA files should have at most 60 characters of sequence info per line for (Size j = 0; j < seq.size(); j += 60) { Size k = min(j + 60, seq.size()); fasta << string(seq[j], seq[k]) << endl; } } } } else { writeLog_("Unsupported output file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } logger.endProgress(); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { vector<ProteinIdentification> prot_ids; vector<PeptideIdentification> pep_ids; ProteinHit temp_protein_hit; //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String inputfile_id = getStringOption_("id"); String inputfile_feature = getStringOption_("feature"); String inputfile_consensus = getStringOption_("consensus"); String inputfile_raw = getStringOption_("in"); String outputfile_name = getStringOption_("out"); //~ bool Ms1(getFlag_("MS1")); //~ bool Ms2(getFlag_("MS2")); bool remove_duplicate_features(getFlag_("remove_duplicate_features")); //------------------------------------------------------------- // fetch vocabularies //------------------------------------------------------------ ControlledVocabulary cv; cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo")); QcMLFile qcmlfile; //------------------------------------------------------------- // MS aqiusition //------------------------------------------------------------ String base_name = QFileInfo(QString::fromStdString(inputfile_raw)).baseName(); cout << "Reading mzML file..." << endl; MzMLFile mz_data_file; MSExperiment<Peak1D> exp; MzMLFile().load(inputfile_raw, exp); //---prep input exp.sortSpectra(); UInt min_mz = std::numeric_limits<UInt>::max(); UInt max_mz = 0; std::map<Size, UInt> mslevelcounts; qcmlfile.registerRun(base_name,base_name); //TODO use UIDs //---base MS aquisition qp String msaq_ref = base_name + "_msaq"; QcMLFile::QualityParameter qp; qp.id = msaq_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000004"; try { //~ const ControlledVocabulary::CVTerm& test = cv.getTermByName("MS aquisition result details"); //~ cout << test.name << test.id << endl; const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); //~ const ControlledVocabulary::CVTerm& term = cv.getTerm("0000004"); qp.name = term.name; ///< Name } catch (...) { qp.name = "mzML file"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //---file origin qp qp = QcMLFile::QualityParameter(); qp.name = "mzML file"; ///< Name qp.id = base_name + "_run_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000577"; qp.value = base_name; qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "instrument model"; ///< Name qp.id = base_name + "_instrument_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000031"; qp.value = exp.getInstrument().getName(); qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "completion time"; ///< Name qp.id = base_name + "_date"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000747"; qp.value = exp.getDateTime().getDate(); qcmlfile.addRunQualityParameter(base_name, qp); //---precursors at QcMLFile::Attachment at; at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000044"; at.qualityRef = msaq_ref; at.id = base_name + "_precursors"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "precursors"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); //RT at.colTypes.push_back("MS:1000040"); //MZ for (Size i = 0; i < exp.size(); ++i) { mslevelcounts[exp[i].getMSLevel()]++; if (exp[i].getMSLevel() == 2) { if (exp[i].getPrecursors().front().getMZ() < min_mz) { min_mz = exp[i].getPrecursors().front().getMZ(); } if (exp[i].getPrecursors().front().getMZ() > max_mz) { max_mz = exp[i].getPrecursors().front().getMZ(); } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(exp[i].getPrecursors().front().getMZ()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); //---aquisition results qp qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000006"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms1aquisition"; ///< Identifier qp.value = String(mslevelcounts[1]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms1 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000007"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms2aquisition"; ///< Identifier qp.value = String(mslevelcounts[2]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms2 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000008"; ///< cv accession for "aquisition results" qp.id = base_name + "_Chromaquisition"; ///< Identifier qp.value = String(exp.getChromatograms().size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of chromatograms"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000009"; at.qualityRef = msaq_ref; at.id = base_name + "_mzrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS MZ aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000010"); //MZ at.colTypes.push_back("QC:0000011"); //MZ std::vector<String> rowmz; rowmz.push_back(String(min_mz)); rowmz.push_back(String(max_mz)); at.tableRows.push_back(rowmz); qcmlfile.addRunAttachment(base_name, at); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000012"; at.qualityRef = msaq_ref; at.id = base_name + "_rtrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS RT aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000013"); //MZ at.colTypes.push_back("QC:0000014"); //MZ std::vector<String> rowrt; rowrt.push_back(String(exp.begin()->getRT())); rowrt.push_back(String(exp.getSpectra().back().getRT())); at.tableRows.push_back(rowrt); qcmlfile.addRunAttachment(base_name, at); //---ion current stability ( & tic ) qp at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000022"; at.qualityRef = msaq_ref; at.id = base_name + "_tics"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS TICs"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); at.colTypes.push_back("MS:1000285"); UInt max = 0; Size below_10k = 0; for (Size i = 0; i < exp.size(); ++i) { if (exp[i].getMSLevel() == 1) { UInt sum = 0; for (Size j = 0; j < exp[i].size(); ++j) { sum += exp[i][j].getIntensity(); } if (sum > max) { max = sum; } if (sum < 10000) { ++below_10k; } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(sum); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.id = base_name + "_ticslump"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000023"; qp.value = String((100 / exp.size()) * below_10k); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "percentage of tic slumps"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //------------------------------------------------------------- // MS id //------------------------------------------------------------ if (inputfile_id != "") { IdXMLFile().load(inputfile_id, prot_ids, pep_ids); cerr << "idXML read ended. Found " << pep_ids.size() << " peptide identifications." << endl; ProteinIdentification::SearchParameters params = prot_ids[0].getSearchParameters(); vector<String> var_mods = params.variable_modifications; //~ boost::regex re("(?<=[KR])(?=[^P])"); String msid_ref = base_name + "_msid"; QcMLFile::QualityParameter qp; qp.id = msid_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000025"; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS identification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000026"; at.qualityRef = msid_ref; at.id = base_name + "_idsetting"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS id settings"; ///< Name } at.colTypes.push_back("MS:1001013"); //MS:1001013 db name MS:1001016 version MS:1001020 taxonomy at.colTypes.push_back("MS:1001016"); at.colTypes.push_back("MS:1001020"); std::vector<String> row; row.push_back(String(prot_ids.front().getSearchParameters().db)); row.push_back(String(prot_ids.front().getSearchParameters().db_version)); row.push_back(String(prot_ids.front().getSearchParameters().taxonomy)); at.tableRows.push_back(row); qcmlfile.addRunAttachment(base_name, at); UInt spectrum_count = 0; Size peptide_hit_count = 0; UInt runs_count = 0; Size protein_hit_count = 0; set<String> peptides; set<String> proteins; Size missedcleavages = 0; for (Size i = 0; i < pep_ids.size(); ++i) { if (!pep_ids[i].empty()) { ++spectrum_count; peptide_hit_count += pep_ids[i].getHits().size(); const vector<PeptideHit>& temp_hits = pep_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { peptides.insert(temp_hits[j].getSequence().toString()); } } } for (set<String>::iterator it = peptides.begin(); it != peptides.end(); ++it) { for (String::const_iterator st = it->begin(); st != it->end() - 1; ++st) { if (*st == 'K' || *st == 'R') { ++missedcleavages; } } } for (Size i = 0; i < prot_ids.size(); ++i) { ++runs_count; protein_hit_count += prot_ids[i].getHits().size(); const vector<ProteinHit>& temp_hits = prot_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { proteins.insert(temp_hits[j].getAccession()); } } qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000037"; ///< cv accession qp.id = base_name + "_misscleave"; ///< Identifier qp.value = missedcleavages; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of missed cleavages"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000032"; ///< cv accession qp.id = base_name + "_totprot"; ///< Identifier qp.value = protein_hit_count; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000033"; ///< cv accession qp.id = base_name + "_totuniqprot"; ///< Identifier qp.value = String(proteins.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000029"; ///< cv accession qp.id = base_name + "_psms"; ///< Identifier qp.value = String(spectrum_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of PSM"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000030"; ///< cv accession qp.id = base_name + "_totpeps"; ///< Identifier qp.value = String(peptide_hit_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000031"; ///< cv accession qp.id = base_name + "_totuniqpeps"; ///< Identifier qp.value = String(peptides.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000038"; at.qualityRef = msid_ref; at.id = base_name + "_massacc"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "delta ppm tables"; } //~ delta ppm QC:0000039 RT MZ uniqueness ProteinID MS:1000885 target/decoy Score PeptideSequence MS:1000889 Annots string Similarity Charge UO:0000219 TheoreticalWeight UO:0000221 Oxidation_(M) at.colTypes.push_back("RT"); at.colTypes.push_back("MZ"); at.colTypes.push_back("Score"); at.colTypes.push_back("PeptideSequence"); at.colTypes.push_back("Charge"); at.colTypes.push_back("TheoreticalWeight"); at.colTypes.push_back("delta_ppm"); for (UInt w = 0; w < var_mods.size(); ++w) { at.colTypes.push_back(String(var_mods[w]).substitute(' ', '_')); } std::vector<double> deltas; //~ prot_ids[0].getSearchParameters(); for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it) { if (it->getHits().size() > 0) { std::vector<String> row; row.push_back(it->getRT()); row.push_back(it->getMZ()); PeptideHit tmp = it->getHits().front(); //TODO depends on score & sort vector<UInt> pep_mods; for (UInt w = 0; w < var_mods.size(); ++w) { pep_mods.push_back(0); } for (AASequence::ConstIterator z = tmp.getSequence().begin(); z != tmp.getSequence().end(); ++z) { Residue res = *z; String temp; if (res.getModification().size() > 0 && res.getModification() != "Carbamidomethyl") { temp = res.getModification() + " (" + res.getOneLetterCode() + ")"; //cout<<res.getModification()<<endl; for (UInt w = 0; w < var_mods.size(); ++w) { if (temp == var_mods[w]) { //cout<<temp; pep_mods[w] += 1; } } } } row.push_back(tmp.getScore()); row.push_back(tmp.getSequence().toString().removeWhitespaces()); row.push_back(tmp.getCharge()); row.push_back(String((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge())); double dppm = /* std::abs */ (getMassDifference(((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()), it->getMZ(), true)); row.push_back(String(dppm)); deltas.push_back(dppm); for (UInt w = 0; w < var_mods.size(); ++w) { row.push_back(pep_mods[w]); } at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000040"; ///< cv accession qp.id = base_name + "_mean_delta"; ///< Identifier qp.value = String(OpenMS::Math::mean(deltas.begin(), deltas.end())); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "mean delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000041"; ///< cv accession qp.id = base_name + "_median_delta"; ///< Identifier qp.value = String(OpenMS::Math::median(deltas.begin(), deltas.end(), false)); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "median delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000035"; ///< cv accession qp.id = base_name + "_ratio_id"; ///< Identifier qp.value = String(double(pep_ids.size()) / double(mslevelcounts[2])); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "id ratio"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } //------------------------------------------------------------- // MS quantitation //------------------------------------------------------------ FeatureMap map; String msqu_ref = base_name + "_msqu"; if (inputfile_feature != "") { FeatureXMLFile f; f.load(inputfile_feature, map); cout << "Read featureXML file..." << endl; //~ UInt fiter = 0; map.sortByRT(); map.updateRanges(); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000045"; ///< cv accession qp.id = msqu_ref; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS quantification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000046"; ///< cv accession qp.id = base_name + "_feature_count"; ///< Identifier qp.value = String(map.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of features"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } if (inputfile_feature != "" && !remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); at.colTypes.push_back("Quality"); at.colTypes.push_back("FWHM"); at.colTypes.push_back("IDs"); UInt fiter = 0; map.sortByRT(); //ofstream out(outputfile_name.c_str()); while (fiter < map.size()) { std::vector<String> row; row.push_back(map[fiter].getMZ()); row.push_back(map[fiter].getRT()); row.push_back(map[fiter].getIntensity()); row.push_back(map[fiter].getCharge()); row.push_back(map[fiter].getOverallQuality()); row.push_back(map[fiter].getWidth()); row.push_back(map[fiter].getPeptideIdentifications().size()); fiter++; at.tableRows.push_back(row); } qcmlfile.addRunAttachment(base_name, at); } else if (inputfile_feature != "" && remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); FeatureMap map, map_out; FeatureXMLFile f; f.load(inputfile_feature, map); UInt fiter = 0; map.sortByRT(); while (fiter < map.size()) { FeatureMap map_tmp; for (UInt k = fiter; k <= map.size(); ++k) { if (abs(map[fiter].getRT() - map[k].getRT()) < 0.1) { //~ cout << fiter << endl; map_tmp.push_back(map[k]); } else { fiter = k; break; } } map_tmp.sortByMZ(); UInt retif = 1; map_out.push_back(map_tmp[0]); while (retif < map_tmp.size()) { if (abs(map_tmp[retif].getMZ() - map_tmp[retif - 1].getMZ()) > 0.01) { cout << "equal RT, but mass different" << endl; map_out.push_back(map_tmp[retif]); } retif++; } } qcmlfile.addRunAttachment(base_name, at); } if (inputfile_consensus != "") { cout << "Reading consensusXML file..." << endl; ConsensusXMLFile f; ConsensusMap map; f.load(inputfile_consensus, map); //~ String CONSENSUS_NAME = "_consensus.tsv"; //~ String combined_out = outputfile_name + CONSENSUS_NAME; //~ ofstream out(combined_out.c_str()); at = QcMLFile::Attachment(); qp.name = "consensuspoints"; ///< Name //~ qp.id = base_name + "_consensuses"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:xxxxxxxx"; ///< cv accession "featuremapper results" at.colTypes.push_back("Native_spectrum_ID"); at.colTypes.push_back("DECON_RT_(sec)"); at.colTypes.push_back("DECON_MZ_(Th)"); at.colTypes.push_back("DECON_Intensity"); at.colTypes.push_back("Feature_RT_(sec)"); at.colTypes.push_back("Feature_MZ_(Th)"); at.colTypes.push_back("Feature_Intensity"); at.colTypes.push_back("Feature_Charge"); for (ConsensusMap::const_iterator cmit = map.begin(); cmit != map.end(); ++cmit) { const ConsensusFeature& CF = *cmit; for (ConsensusFeature::const_iterator cfit = CF.begin(); cfit != CF.end(); ++cfit) { std::vector<String> row; FeatureHandle FH = *cfit; row.push_back(CF.getMetaValue("spectrum_native_id")); row.push_back(CF.getRT()); row.push_back(CF.getMZ()); row.push_back(CF.getIntensity()); row.push_back(FH.getRT()); row.push_back(FH.getMZ()); row.push_back(FH.getCharge()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); } //------------------------------------------------------------- // finalize //------------------------------------------------------------ qcmlfile.store(outputfile_name); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- // file list StringList file_list = getStringList_("in"); // file type FileHandler file_handler; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = file_handler.getType(file_list[0]); } // output file names and types String out_file = getStringOption_("out"); bool annotate_file_origin = getFlag_("annotate_file_origin"); rt_gap_ = getDoubleOption_("rt_concat:gap"); vector<String> trafo_out = getStringList_("rt_concat:trafo_out"); if (trafo_out.empty()) { // resize now so we don't have to worry about indexing out of bounds: trafo_out.resize(file_list.size()); } else if (trafo_out.size() != file_list.size()) { writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // calculations //------------------------------------------------------------- if (force_type == FileTypes::FEATUREXML) { FeatureMap out; FeatureXMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); // skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; TraMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); fh.store(out_file, out); } else // raw data input (e.g. mzML) { // RT bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (!custom_rts.empty()) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!"); return ILLEGAL_PARAMETERS; } } // MS level Int ms_level = getIntOption_("raw:ms_level"); MSExperiment<> out; UInt rt_auto = 0; UInt native_id = 0; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; // load file force_type = file_handler.getType(file_list[i]); MSExperiment<> in; file_handler.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); // warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } // handle special raw data options: for (MSExperiment<>::iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { float rt_final = spec_it->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { static const boost::regex re("rt(\\d+(\\.\\d+)?)"); boost::smatch match; bool found = boost::regex_search(filename, match, re); if (found) { rt_final = String(match[1]).toFloat(); } else { writeLog_("Warning: could not extract retention time from filename '" + filename + "'"); } } // none of the rt methods were successful if (rt_final < 0) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } spec_it->setRT(rt_final); spec_it->setNativeID("spectrum=" + String(native_id)); if (ms_level > 0) { spec_it->setMSLevel(ms_level); } ++native_id; } // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { in[0].setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways) } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(in, trafo_out[i], i == 0); } // add spectra to output for (MSExperiment<>::const_iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { out.addSpectrum(*spec_it); } // also add the chromatograms for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator chrom_it = in.getChromatograms().begin(); chrom_it != in.getChromatograms().end(); ++chrom_it) { out.addChromatogram(*chrom_it); } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //file list StringList file_list = getStringList_("in"); //file type FileHandler fh; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = fh.getType(file_list[0]); } //output file names and types String out_file = getStringOption_("out"); //------------------------------------------------------------- // calculations //------------------------------------------------------------- bool annotate_file_origin = getFlag_("annotate_file_origin"); if (force_type == FileTypes::FEATUREXML) { FeatureMap<> out; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap<> map; FeatureXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); FeatureXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); //skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; ConsensusXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); ConsensusXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; TraMLFile fh; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); TraMLFile f; f.store(out_file, out); } else { // we might want to combine different types, thus we only // query in_type (which applies to all files) // and not the suffix or content of a single file force_type = FileTypes::nameToType(getStringOption_("in_type")); //rt bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (custom_rts.size() != 0) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list must have as many elements as there are input files!"); printUsage_(); return ILLEGAL_PARAMETERS; } } //ms level bool user_ms_level = getFlag_("raw:user_ms_level"); MSExperiment<> out; out.reserve(file_list.size()); UInt rt_auto = 0; UInt native_id = 0; std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; //load file MSExperiment<> in; fh.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); //warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2) { //handle rt Real rt_final = it2->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { if (!filename.hasSubstring("rt")) { writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'")); } for (Size i = 0; i < filename.size(); ++i) { if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i])) { String rt; while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i]))) { rt += filename[i++]; } if (rt.size() > 0) { // remove dot from rt3892.98.dta // ^ if (rt[rt.size() - 1] == '.') { // remove last character rt.erase(rt.end() - 1); } } try { float tmp = rt.toFloat(); rt_final = tmp; } catch (Exception::ConversionError) { writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'.")); } } } } // none of the rt methods were successful if (rt_final == -1) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } out.addSpectrum(*it2); out.getSpectra().back().setRT(rt_final); out.getSpectra().back().setNativeID(native_id); if (user_ms_level) { out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level")); } ++native_id; } // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (its in the spectrum anyways) } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } // also add the chromatograms for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2) { all_chromatograms.push_back(*it2); } } // set the chromatograms out.setChromatograms(all_chromatograms); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
TEST_STRING_EQUAL(FileTypes::typeToName(e.getLoadedFileType()),"dta2d"); TEST_EQUAL(e.size(), 9); ABORT_IF(e.size() != 9) TEST_STRING_EQUAL(e[0].getNativeID(),"index=0") TEST_STRING_EQUAL(e[1].getNativeID(),"index=1") TEST_STRING_EQUAL(e[2].getNativeID(),"index=2") TEST_STRING_EQUAL(e[3].getNativeID(),"index=3") TEST_STRING_EQUAL(e[4].getNativeID(),"index=4") TEST_STRING_EQUAL(e[5].getNativeID(),"index=5") TEST_STRING_EQUAL(e[6].getNativeID(),"index=6") TEST_STRING_EQUAL(e[7].getNativeID(),"index=7") TEST_STRING_EQUAL(e[8].getNativeID(),"index=8") MSExperiment<>::const_iterator it(e.begin()); TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 230.02) TEST_REAL_SIMILAR(it->getRT(), 4711.1) TEST_REAL_SIMILAR((*it)[0].getIntensity(), 47218.89) ++it; TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 231.51) TEST_REAL_SIMILAR(it->getRT(), 4711.2) TEST_REAL_SIMILAR((*it)[0].getIntensity(), 89935.22) ++it; TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 139.42) TEST_REAL_SIMILAR(it->getRT(), 4711.3) TEST_REAL_SIMILAR((*it)[0].getIntensity(), 318.52) ++it;