boost::shared_ptr<IsobaricQuantitationMethod> IBSpectraFile::guessExperimentType_(const ConsensusMap& cm) { if (cm.getExperimentType() != "labeled_MS2" && cm.getExperimentType() != "itraq") { throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Given ConsensusMap does not hold any isobaric quantification data."); } // we take the mapcount as approximation if (cm.getFileDescriptions().size() == 4) { return boost::shared_ptr<IsobaricQuantitationMethod>(new ItraqFourPlexQuantitationMethod); } else if (cm.getFileDescriptions().size() == 6) { return boost::shared_ptr<IsobaricQuantitationMethod>(new TMTSixPlexQuantitationMethod); } else if (cm.getFileDescriptions().size() == 8) { return boost::shared_ptr<IsobaricQuantitationMethod>(new ItraqEightPlexQuantitationMethod); } else { throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Could not guess isobaric quantification data from ConsensusMap due to non-matching number of input maps."); } }
void SeedListGenerator::generateSeedLists(const ConsensusMap& consensus, Map<UInt64, SeedList>& seed_lists) { seed_lists.clear(); // iterate over all consensus features... for (ConsensusMap::ConstIterator cons_it = consensus.begin(); cons_it != consensus.end(); ++cons_it) { DPosition<2> point(cons_it->getRT(), cons_it->getMZ()); // for each sub-map in the consensus map, add a seed at the position of // this consensus feature: for (ConsensusMap::FileDescriptions::const_iterator file_it = consensus.getFileDescriptions().begin(); file_it != consensus.getFileDescriptions().end(); ++file_it) seed_lists[file_it->first].push_back(point); // for each feature contained in the consensus feature, remove the seed of // the corresponding map: for (ConsensusFeature::HandleSetType::const_iterator feat_it = cons_it->getFeatures().begin(); feat_it != cons_it->getFeatures().end(); ++feat_it) { seed_lists[feat_it->getMapIndex()].pop_back(); } // this leaves seeds for maps where no feature was found near the // consensus position } }
void FeatureGroupingAlgorithm::transferSubelements(const vector<ConsensusMap>& maps, ConsensusMap& out) const { // accumulate file descriptions from the input maps: // cout << "Updating file descriptions..." << endl; out.getFileDescriptions().clear(); // mapping: (map index, original id) -> new id map<pair<Size, UInt64>, Size> mapid_table; for (Size i = 0; i < maps.size(); ++i) { const ConsensusMap& consensus = maps[i]; for (ConsensusMap::FileDescriptions::const_iterator desc_it = consensus.getFileDescriptions().begin(); desc_it != consensus.getFileDescriptions().end(); ++desc_it) { Size counter = mapid_table.size(); mapid_table[make_pair(i, desc_it->first)] = counter; out.getFileDescriptions()[counter] = desc_it->second; } } // look-up table: input map -> unique ID -> consensus feature // cout << "Creating look-up table..." << endl; vector<map<UInt64, ConsensusMap::ConstIterator> > feat_lookup(maps.size()); for (Size i = 0; i < maps.size(); ++i) { const ConsensusMap& consensus = maps[i]; for (ConsensusMap::ConstIterator feat_it = consensus.begin(); feat_it != consensus.end(); ++feat_it) { // do NOT use "id_lookup[i][feat_it->getUniqueId()] = feat_it;" here as // you will get "attempt to copy-construct an iterator from a singular // iterator" in STL debug mode: feat_lookup[i].insert(make_pair(feat_it->getUniqueId(), feat_it)); } } // adjust the consensus features: // cout << "Adjusting consensus features..." << endl; for (ConsensusMap::iterator cons_it = out.begin(); cons_it != out.end(); ++cons_it) { ConsensusFeature adjusted = ConsensusFeature( static_cast<BaseFeature>(*cons_it)); // remove sub-features for (ConsensusFeature::HandleSetType::const_iterator sub_it = cons_it->getFeatures().begin(); sub_it != cons_it->getFeatures().end(); ++sub_it) { UInt64 id = sub_it->getUniqueId(); Size map_index = sub_it->getMapIndex(); ConsensusMap::ConstIterator origin = feat_lookup[map_index][id]; for (ConsensusFeature::HandleSetType::const_iterator handle_it = origin->getFeatures().begin(); handle_it != origin->getFeatures().end(); ++handle_it) { FeatureHandle handle = *handle_it; Size new_id = mapid_table[make_pair(map_index, handle.getMapIndex())]; handle.setMapIndex(new_id); adjusted.insert(handle); } } *cons_it = adjusted; } }
void FeatureGroupingAlgorithmLabeled::group(const std::vector<FeatureMap<> > & maps, ConsensusMap & out) { //check that the number of maps is ok if (maps.size() != 1) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Exactly one map must be given!"); if (out.getFileDescriptions().size() != 2) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Two file descriptions must be set in 'out'!"); //initialize LabeledPairFinder LabeledPairFinder pm; pm.setParameters(param_.copy("", true)); //convert to consensus map std::vector<ConsensusMap> input(1); ConsensusMap::convert(0, maps[0], input[0]); //run pm.run(input, out); }
void IsobaricChannelExtractor::registerChannelsInOutputMap_(ConsensusMap& consensus_map) { // register the individual channels in the output consensus map Int index = 0; for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin(); cl_it != quant_method_->getChannelInformation().end(); ++cl_it) { ConsensusMap::FileDescription channel_as_map; // label is the channel + description provided in the Params channel_as_map.label = quant_method_->getName() + "_" + cl_it->name; // TODO(aiche): number of features need to be set later channel_as_map.size = consensus_map.size(); // add some more MetaInfo channel_as_map.setMetaValue("channel_name", cl_it->name); channel_as_map.setMetaValue("channel_id", cl_it->id); channel_as_map.setMetaValue("channel_description", cl_it->description); channel_as_map.setMetaValue("channel_center", cl_it->center); consensus_map.getFileDescriptions()[index++] = channel_as_map; } }
void IsobaricQuantifier::computeLabelingStatistics_(ConsensusMap& consensus_map_out) { // number of total quantified spectra stats_.number_ms2_total = consensus_map_out.size(); // Labeling efficiency statistics for (size_t i = 0; i < consensus_map_out.size(); ++i) { // is whole scan empty?! if (consensus_map_out[i].getIntensity() == 0) ++stats_.number_ms2_empty; // look at single reporters for (ConsensusFeature::HandleSetType::const_iterator it_elements = consensus_map_out[i].begin(); it_elements != consensus_map_out[i].end(); ++it_elements) { if (it_elements->getIntensity() == 0) { String ch_index = consensus_map_out.getFileDescriptions()[it_elements->getMapIndex()].getMetaValue("channel_name"); ++stats_.empty_channels[ch_index]; } } } LOG_INFO << "IsobaricQuantifier: skipped " << stats_.number_ms2_empty << " of " << consensus_map_out.size() << " selected scans due to lack of reporter information:\n"; consensus_map_out.setMetaValue("isoquant:scans_noquant", stats_.number_ms2_empty); consensus_map_out.setMetaValue("isoquant:scans_total", consensus_map_out.size()); LOG_INFO << "IsobaricQuantifier: channels with signal\n"; for (std::map<String, Size>::const_iterator it_m = stats_.empty_channels.begin(); it_m != stats_.empty_channels.end(); ++it_m) { LOG_INFO << " channel " << it_m->first << ": " << (consensus_map_out.size() - it_m->second) << " / " << consensus_map_out.size() << " (" << ((consensus_map_out.size() - it_m->second) * 100 / consensus_map_out.size()) << "%)\n"; consensus_map_out.setMetaValue(String("isoquant:quantifyable_ch") + it_m->first, (consensus_map_out.size() - it_m->second)); } }
ExitCodes main_(int, const char **) { // data to be passed through the algorithm vector<vector<SILACPattern> > data; MSQuantifications msq; vector<Clustering *> cluster_data; // // Parameter handling // map<String, DoubleReal> label_identifiers; // list defining the mass shifts of each label (e.g. "Arg6" => 6.0201290268) handleParameters_sample(); handleParameters_algorithm(); handleParameters_labels(label_identifiers); handleParameters(); if (selected_labels.empty() && !out.empty()) // incompatible parameters { writeLog_("Error: The 'out' parameter cannot be used without a label (parameter 'sample:labels'). Use 'out_features' instead."); return ILLEGAL_PARAMETERS; } // // Initializing the SILACAnalzer with our parameters // SILACAnalyzer analyzer; analyzer.setLogType(log_type_); analyzer.initialize( // section "sample" selected_labels, charge_min, charge_max, missed_cleavages, isotopes_per_peptide_min, isotopes_per_peptide_max, // section "algorithm" rt_threshold, rt_min, intensity_cutoff, intensity_correlation, model_deviation, allow_missing_peaks, // labels label_identifiers); //-------------------------------------------------- // loading input from .mzML //-------------------------------------------------- MzMLFile file; MSExperiment<Peak1D> exp; // only read MS1 spectra ... /* std::vector<int> levels; levels.push_back(1); file.getOptions().setMSLevels(levels); */ LOG_DEBUG << "Loading input..." << endl; file.setLogType(log_type_); file.load(in, exp); // set size of input map exp.updateRanges(); // extract level 1 spectra exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<MSExperiment<Peak1D>::SpectrumType>(IntList::create("1"), true)), exp.end()); // sort according to RT and MZ exp.sortSpectra(); if (out_mzq != "") { vector<vector<String> > SILAClabels = analyzer.getSILAClabels(); // list of SILAC labels, e.g. selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6" std::vector<std::vector<std::pair<String, DoubleReal> > > labels; //add none label labels.push_back(std::vector<std::pair<String, DoubleReal> >(1, std::make_pair<String, DoubleReal>(String("none"), DoubleReal(0)))); for (Size i = 0; i < SILAClabels.size(); ++i) //SILACLabels MUST be in weight order!!! { std::vector<std::pair<String, DoubleReal> > one_label; for (UInt j = 0; j < SILAClabels[i].size(); ++j) { one_label.push_back(*(label_identifiers.find(SILAClabels[i][j]))); // this dereferencing would break if all SILAClabels would not have been checked before! } labels.push_back(one_label); } msq.registerExperiment(exp, labels); //add assays msq.assignUIDs(); } MSQuantifications::QUANT_TYPES quant_type = MSQuantifications::MS1LABEL; msq.setAnalysisSummaryQuantType(quant_type); //add analysis_summary_ //-------------------------------------------------- // estimate peak width //-------------------------------------------------- LOG_DEBUG << "Estimating peak width..." << endl; PeakWidthEstimator::Result peak_width; try { peak_width = analyzer.estimatePeakWidth(exp); } catch (Exception::InvalidSize &) { writeLog_("Error: Unable to estimate peak width of input data."); return INCOMPATIBLE_INPUT_DATA; } if (in_filters == "") { //-------------------------------------------------- // filter input data //-------------------------------------------------- LOG_DEBUG << "Filtering input data..." << endl; analyzer.filterData(exp, peak_width, data); //-------------------------------------------------- // store filter results //-------------------------------------------------- if (out_filters != "") { LOG_DEBUG << "Storing filtering results..." << endl; ConsensusMap map; for (std::vector<std::vector<SILACPattern> >::const_iterator it = data.begin(); it != data.end(); ++it) { analyzer.generateFilterConsensusByPattern(map, *it); } analyzer.writeConsensus(out_filters, map); } } else { //-------------------------------------------------- // load filter results //-------------------------------------------------- LOG_DEBUG << "Loading filtering results..." << endl; ConsensusMap map; analyzer.readConsensus(in_filters, map); analyzer.readFilterConsensusByPattern(map, data); } //-------------------------------------------------- // clustering //-------------------------------------------------- LOG_DEBUG << "Clustering data..." << endl; analyzer.clusterData(exp, peak_width, cluster_data, data); //-------------------------------------------------------------- // write output //-------------------------------------------------------------- if (out_debug != "") { LOG_DEBUG << "Writing debug output file..." << endl; std::ofstream out((out_debug + ".clusters.csv").c_str()); vector<vector<DoubleReal> > massShifts = analyzer.getMassShifts(); // list of mass shifts // generate header out << std::fixed << std::setprecision(8) << "ID,RT,MZ_PEAK,CHARGE"; for (UInt i = 1; i <= massShifts[0].size(); ++i) { out << ",DELTA_MASS_" << i + 1; } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_PEAK_" << i + 1 << '_' << j; } } out << ",MZ_RAW"; for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_RAW_" << i + 1 << '_' << j; } } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",MZ_RAW_" << i + 1 << '_' << j; } } out << '\n'; // write data UInt cluster_id = 0; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterDebug(out, **it, cluster_id); } } if (out != "") { LOG_DEBUG << "Generating output consensus map..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterConsensusByCluster(map, **it); } LOG_DEBUG << "Adding meta data..." << endl; // XXX: Need a map per mass shift ConsensusMap::FileDescriptions& desc = map.getFileDescriptions(); Size id = 0; for (ConsensusMap::FileDescriptions::iterator it = desc.begin(); it != desc.end(); ++it) { if (test_mode_) it->second.filename = in; // skip path, since its not cross platform and complicates verification else it->second.filename = File::basename(in); // Write correct label // (this would crash if used without a label!) if (id > 0) it->second.label = StringList(analyzer.getSILAClabels()[id - 1]).concatenate(""); // skip first round (empty label is not listed) ++id; } std::set<DataProcessing::ProcessingAction> actions; actions.insert(DataProcessing::DATA_PROCESSING); actions.insert(DataProcessing::PEAK_PICKING); actions.insert(DataProcessing::FILTERING); actions.insert(DataProcessing::QUANTITATION); addDataProcessing_(map, getProcessingInfo_(actions)); analyzer.writeConsensus(out, map); if (out_mzq != "") { LOG_DEBUG << "Generating output mzQuantML file..." << endl; ConsensusMap numap(map); //calc. ratios for (ConsensusMap::iterator cit = numap.begin(); cit != numap.end(); ++cit) { //~ make ratio templates std::vector<ConsensusFeature::Ratio> rts; for (std::vector<MSQuantifications::Assay>::const_iterator ait = msq.getAssays().begin() + 1; ait != msq.getAssays().end(); ++ait) { ConsensusFeature::Ratio r; r.numerator_ref_ = String(msq.getAssays().begin()->uid_); r.denominator_ref_ = String(ait->uid_); r.description_.push_back("Simple ratio calc"); r.description_.push_back("light to medium/.../heavy"); //~ "<cvParam cvRef=\"PSI-MS\" accession=\"MS:1001132\" name=\"peptide ratio\"/>" rts.push_back(r); } const ConsensusFeature::HandleSetType& feature_handles = cit->getFeatures(); if (feature_handles.size() > 1) { std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fit = feature_handles.begin(); // this is unlabeled fit++; for (; fit != feature_handles.end(); ++fit) { Size ri = std::distance(feature_handles.begin(), fit); rts[ri - 1].ratio_value_ = feature_handles.begin()->getIntensity() / fit->getIntensity(); // a proper silacalanyzer algo should never have 0-intensities so no 0devison ... } } cit->setRatios(rts); } msq.addConsensusMap(numap); //add SILACAnalyzer result //~ msq.addFeatureMap();//add SILACAnalyzer evidencetrail as soon as clear what is realy contained in the featuremap //~ add AuditCollection - no such concept in TOPPTools yet analyzer.writeMzQuantML(out_mzq, msq); } } if (out_clusters != "") { LOG_DEBUG << "Generating cluster output file..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { UInt cluster_id = 0; analyzer.generateClusterConsensusByPattern(map, **it, cluster_id); } ConsensusMap::FileDescription & desc = map.getFileDescriptions()[0]; desc.filename = in; desc.label = "Cluster"; analyzer.writeConsensus(out_clusters, map); } if (out_features != "") { LOG_DEBUG << "Generating output feature map..." << endl; FeatureMap<> map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterFeatureByCluster(map, **it); } analyzer.writeFeatures(out_features, map); } return EXECUTION_OK; }
vector<double> ConsensusMapNormalizerAlgorithmThreshold::computeCorrelation(const ConsensusMap& map, const double& ratio_threshold, const String& acc_filter, const String& desc_filter) { Size number_of_features = map.size(); Size number_of_maps = map.getFileDescriptions().size(); vector<vector<double> > feature_int(number_of_maps); //get map with most features, resize feature_int UInt map_with_most_features_idx = 0; ConsensusMap::FileDescriptions::const_iterator map_with_most_features = map.getFileDescriptions().find(0); for (UInt i = 0; i < number_of_maps; i++) { feature_int[i].resize(number_of_features); ConsensusMap::FileDescriptions::const_iterator it = map.getFileDescriptions().find(i); if (it->second.size > map_with_most_features->second.size) { map_with_most_features = it; map_with_most_features_idx = i; } } //fill feature_int with intensities Size pass_counter = 0; ConsensusMap::ConstIterator cf_it; UInt idx = 0; for (cf_it = map.begin(); cf_it != map.end(); ++cf_it, ++idx) { if (!ConsensusMapNormalizerAlgorithmMedian::passesFilters_(cf_it, map, acc_filter, desc_filter)) { continue; } ++pass_counter; ConsensusFeature::HandleSetType::const_iterator f_it; for (f_it = cf_it->getFeatures().begin(); f_it != cf_it->getFeatures().end(); ++f_it) { feature_int[f_it->getMapIndex()][idx] = f_it->getIntensity(); } } LOG_INFO << endl << "Using " << pass_counter << "/" << map.size() << " consensus features for computing normalization coefficients" << endl << endl; //determine ratio vector<double> ratio_vector(number_of_maps); for (UInt j = 0; j < number_of_maps; j++) { vector<double> ratios; for (UInt k = 0; k < number_of_features; ++k) { if (feature_int[map_with_most_features_idx][k] != 0.0 && feature_int[j][k] != 0.0) { double ratio = feature_int[map_with_most_features_idx][k] / feature_int[j][k]; if (ratio > ratio_threshold && ratio < 1 / ratio_threshold) { ratios.push_back(ratio); } } } if (ratios.empty()) { LOG_WARN << endl << "Not enough features passing filters. Cannot compute normalization coefficients for all maps. Result will be unnormalized." << endl << endl; return vector<double>(number_of_maps, 1.0); } ratio_vector[j] = Math::mean(ratios.begin(), ratios.end()); } return ratio_vector; }
ExitCodes main_(int, const char **) { FeatureGroupingAlgorithmUnlabeled * algorithm = new FeatureGroupingAlgorithmUnlabeled(); //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList ins; ins = getStringList_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- // check if all input files have the correct type FileTypes::Type file_type = FileHandler::getType(ins[0]); for (Size i = 0; i < ins.size(); ++i) { if (FileHandler::getType(ins[i]) != file_type) { writeLog_("Error: All input files must be of the same type!"); return ILLEGAL_PARAMETERS; } } //------------------------------------------------------------- // set up algorithm //------------------------------------------------------------- Param algorithm_param = getParam_().copy("algorithm:", true); writeDebug_("Used algorithm parameters", algorithm_param, 3); algorithm->setParameters(algorithm_param); Size reference_index(0); //------------------------------------------------------------- // perform grouping //------------------------------------------------------------- // load input ConsensusMap out_map; StringList ms_run_locations; if (file_type == FileTypes::FEATUREXML) { // use map with highest number of features as reference: Size max_count(0); FeatureXMLFile f; for (Size i = 0; i < ins.size(); ++i) { Size s = f.loadSize(ins[i]); if (s > max_count) { max_count = s; reference_index = i; } } // Load reference map and input it to the algorithm UInt64 ref_id; Size ref_size; std::vector<PeptideIdentification> ref_pepids; std::vector<ProteinIdentification> ref_protids; { FeatureMap map_ref; FeatureXMLFile f_fxml_tmp; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[reference_index], map_ref); algorithm->setReference(reference_index, map_ref); ref_id = map_ref.getUniqueId(); ref_size = map_ref.size(); ref_pepids = map_ref.getUnassignedPeptideIdentifications(); ref_protids = map_ref.getProteinIdentifications(); } ConsensusMap dummy; // go through all input files and add them to the result one by one for (Size i = 0; i < ins.size(); ++i) { FeatureXMLFile f_fxml_tmp; FeatureMap tmp_map; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[i], tmp_map); // copy over information on the primary MS run StringList ms_runs; tmp_map.getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); if (i != reference_index) { algorithm->addToGroup(i, tmp_map); // store some meta-data about the maps in the "dummy" object -> try to // keep the same order as they were given in the input independent of // which map is the reference. dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = tmp_map.size(); dummy.getFileDescriptions()[i].unique_id = tmp_map.getUniqueId(); // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), tmp_map.getProteinIdentifications().begin(), tmp_map.getProteinIdentifications().end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), tmp_map.getUnassignedPeptideIdentifications().begin(), tmp_map.getUnassignedPeptideIdentifications().end()); } else { // copy the meta-data from the refernce map dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = ref_size; dummy.getFileDescriptions()[i].unique_id = ref_id; // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), ref_protids.begin(), ref_protids.end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), ref_pepids.begin(), ref_pepids.end()); } } // get the resulting map out_map = algorithm->getResultMap(); // // Copy back meta-data (Protein / Peptide ids / File descriptions) // // add protein identifications to result map out_map.getProteinIdentifications().insert( out_map.getProteinIdentifications().end(), dummy.getProteinIdentifications().begin(), dummy.getProteinIdentifications().end()); // add unassigned peptide identifications to result map out_map.getUnassignedPeptideIdentifications().insert( out_map.getUnassignedPeptideIdentifications().end(), dummy.getUnassignedPeptideIdentifications().begin(), dummy.getUnassignedPeptideIdentifications().end()); out_map.setFileDescriptions(dummy.getFileDescriptions()); // canonical ordering for checking the results, and the ids have no real meaning anyway // the way this was done in DelaunayPairFinder and StablePairFinder // -> the same ordering as FeatureGroupingAlgorithmUnlabeled::group applies! out_map.sortByMZ(); out_map.updateRanges(); } else { vector<ConsensusMap> maps(ins.size()); ConsensusXMLFile f; for (Size i = 0; i < ins.size(); ++i) { f.load(ins[i], maps[i]); StringList ms_runs; maps[i].getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); } // group algorithm->FeatureGroupingAlgorithm::group(maps, out_map); // set file descriptions: bool keep_subelements = getFlag_("keep_subelements"); if (!keep_subelements) { for (Size i = 0; i < ins.size(); ++i) { out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = maps[i].size(); out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId(); } } else { // components of the output map are not the input maps themselves, but // the components of the input maps: algorithm->transferSubelements(maps, out_map); } } // assign unique ids out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); // annotate output with data processing info addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING)); out_map.setPrimaryMSRunPath(ms_run_locations); // write output ConsensusXMLFile().store(out, out_map); // some statistics map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } LOG_INFO << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } LOG_INFO << " total: " << setw(6) << out_map.size() << endl; delete algorithm; return EXECUTION_OK; }
/// @brief extracts the iTRAQ channels from the MS data and stores intensity values in a consensus map /// /// @param ms_exp_data Raw data to read /// @param consensus_map Output each MS² scan as a consensus feature /// @throws Exception::MissingInformation if no scans present or MS² scan has no precursor void ItraqChannelExtractor::run(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map) { if (ms_exp_data.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry."; throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!"); } MSExperiment<> ms_exp_MS2; String mode = (String) param_.getValue("select_activation"); std::cout << "Selecting scans with activation mode: " << (mode == "" ? "any" : mode) << "\n"; HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(ListUtils::create<String>(mode)); for (size_t idx = 0; idx < ms_exp_data.size(); ++idx) { if (ms_exp_data[idx].getMSLevel() == 2) { if (mode == "" || activation_predicate(ms_exp_data[idx])) { // copy only MS² scans ms_exp_MS2.addSpectrum(ms_exp_data[idx]); } else { //std::cout << "deleting spectrum # " << idx << " with RT: " << ms_exp_data[idx].getRT() << "\n"; } } } #ifdef ITRAQ_DEBUG std::cout << "we have " << ms_exp_MS2.size() << " scans left of level " << ms_exp_MS2[0].getMSLevel() << std::endl; std::cout << "run: channel_map_ has " << channel_map_.size() << " entries!" << std::endl; #endif consensus_map.clear(false); // set <mapList> header Int index_cnt = 0; for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it) { // structure of Map cm_it // first == channel-name as Int e.g. 114 // second == ChannelInfo struct ConsensusMap::FileDescription channel_as_map; // label is the channel + description provided in the Params if (itraq_type_ != TMT_SIXPLEX) channel_as_map.label = "iTRAQ_" + String(cm_it->second.name) + "_" + String(cm_it->second.description); else channel_as_map.label = "TMT_" + String(cm_it->second.name) + "_" + String(cm_it->second.description); channel_as_map.size = ms_exp_MS2.size(); //TODO what about .filename? leave empty? // add some more MetaInfo channel_as_map.setMetaValue("channel_name", cm_it->second.name); channel_as_map.setMetaValue("channel_id", cm_it->second.id); channel_as_map.setMetaValue("channel_description", cm_it->second.description); channel_as_map.setMetaValue("channel_center", cm_it->second.center); channel_as_map.setMetaValue("channel_active", String(cm_it->second.active ? "true" : "false")); consensus_map.getFileDescriptions()[index_cnt++] = channel_as_map; } // create consensusElements Peak2D::CoordinateType allowed_deviation = (Peak2D::CoordinateType) param_.getValue("reporter_mass_shift"); // now we have picked data // --> assign peaks to channels UInt element_index(0); for (MSExperiment<>::ConstIterator it = ms_exp_MS2.begin(); it != ms_exp_MS2.end(); ++it) { // store RT&MZ of parent ion as centroid of ConsensusFeature ConsensusFeature cf; cf.setUniqueId(); cf.setRT(it->getRT()); if (it->getPrecursors().size() >= 1) { cf.setMZ(it->getPrecursors()[0].getMZ()); } else { throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + String(it->getNativeID()) + " with RT " + String(it->getRT())); } Peak2D channel_value; channel_value.setRT(it->getRT()); // for each each channel Int index = 0; Peak2D::IntensityType overall_intensity = 0; for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it) { // set mz-position of channel channel_value.setMZ(cm_it->second.center); // reset intensity channel_value.setIntensity(0); //add up all signals for (MSExperiment<>::SpectrumType::ConstIterator mz_it = it->MZBegin(cm_it->second.center - allowed_deviation) ; mz_it != it->MZEnd(cm_it->second.center + allowed_deviation) ; ++mz_it ) { channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity()); } overall_intensity += channel_value.getIntensity(); // add channel to ConsensusFeature cf.insert(index++, channel_value, element_index); } // ! channel_iterator // check featureHandles are not empty if (overall_intensity == 0) { cf.setMetaValue("all_empty", String("true")); } cf.setIntensity(overall_intensity); consensus_map.push_back(cf); // the tandem-scan in the order they appear in the experiment ++element_index; } // ! Experiment iterator #ifdef ITRAQ_DEBUG std::cout << "processed " << element_index << " scans" << std::endl; #endif consensus_map.setExperimentType("itraq"); return; }
ExitCodes outputTo(ostream& os) { //------------------------------------------------------------- // Parameter handling //------------------------------------------------------------- // File names String in = getStringOption_("in"); // File type FileHandler fh; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } MSExperiment<Peak1D> exp; FeatureMap feat; ConsensusMap cons; if (in_type == FileTypes::FEATUREXML) //features { FeatureXMLFile().load(in, feat); feat.updateRanges(); } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { ConsensusXMLFile().load(in, cons); cons.updateRanges(); } //------------------------------------------------------------- // meta information //------------------------------------------------------------- if (getFlag_("m")) { os << endl << "-- General information --" << endl << endl << "file name: " << in << endl << "file type: " << FileTypes::typeToName(in_type) << endl; //basic info os << endl << "-- Meta information --" << endl << endl; if (in_type == FileTypes::FEATUREXML) //features { os << "Document id : " << feat.getIdentifier() << endl << endl; } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { os << "Document id : " << cons.getIdentifier() << endl << endl; } } //------------------------------------------------------------- // data processing //------------------------------------------------------------- if (getFlag_("p")) { //basic info os << endl << "-- Data processing information --" << endl << endl; //get data processing info vector<DataProcessing> dp; if (in_type == FileTypes::FEATUREXML) //features { dp = feat.getDataProcessing(); } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { dp = cons.getDataProcessing(); } int i = 0; for (vector<DataProcessing>::iterator it = dp.begin(); it != dp.end(); ++it) { os << "Data processing " << i << endl; os << "\tcompletion_time: " << (*it).getCompletionTime().getDate() << 'T' << (*it).getCompletionTime().getTime() << endl; os << "\tsoftware name: " << (*it).getSoftware().getName() << " version " << (*it).getSoftware().getVersion() << endl; for (set<DataProcessing::ProcessingAction>::const_iterator paIt = (*it).getProcessingActions().begin(); paIt != (*it).getProcessingActions().end(); ++paIt) { os << "\t\tprocessing action: " << DataProcessing::NamesOfProcessingAction[*paIt] << endl; } } ++i; } //------------------------------------------------------------- // statistics //------------------------------------------------------------- if (getFlag_("s")) { //------------------------------------------------------------- // Content statistics //------------------------------------------------------------- Map<String, int> meta_names; if (in_type == FileTypes::FEATUREXML) //features { os << "Number of features: " << feat.size() << endl << endl << "Ranges:" << endl << " retention time: " << String::number(feat.getMin()[Peak2D::RT], 2) << " : " << String::number(feat.getMax()[Peak2D::RT], 2) << endl << " mass-to-charge: " << String::number(feat.getMin()[Peak2D::MZ], 2) << " : " << String::number(feat.getMax()[Peak2D::MZ], 2) << endl << " intensity: " << String::number(feat.getMinInt(), 2) << " : " << String::number(feat.getMaxInt(), 2) << endl << endl; // Charge distribution Map<UInt, UInt> charges; for (Size i = 0; i < feat.size(); ++i) { charges[feat[i].getCharge()]++; } os << "Charge distribution" << endl; for (Map<UInt, UInt>::const_iterator it = charges.begin(); it != charges.end(); ++it) { os << "charge " << it->first << ": " << it->second << endl; } } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = cons.begin(); cmit != cons.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } os << endl << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { os << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } os << " total: " << setw(6) << cons.size() << endl << endl; os << "Ranges:" << endl << " retention time: " << String::number(cons.getMin()[Peak2D::RT], 2) << " : " << String::number(cons.getMax()[Peak2D::RT], 2) << endl << " mass-to-charge: " << String::number(cons.getMin()[Peak2D::MZ], 2) << " : " << String::number(cons.getMax()[Peak2D::MZ], 2) << endl << " intensity: " << String::number(cons.getMinInt(), 2) << " : " << String::number(cons.getMaxInt(), 2) << endl; // file descriptions const ConsensusMap::FileDescriptions& descs = cons.getFileDescriptions(); if (!descs.empty()) { os << endl << "File descriptions:" << endl; for (ConsensusMap::FileDescriptions::const_iterator it = descs.begin(); it != descs.end(); ++it) { os << " - " << it->second.filename << endl << " identifier: " << it->first << endl << " label : " << it->second.label << endl << " size : " << it->second.size << endl; } } } os << endl << "-- Summary Statistics --" << endl << endl; } if (in_type == FileTypes::FEATUREXML) //features { feat.sortByRT(); vector<double> slice_stats; Size n = getIntOption_("n"); Size begin = 0; Size end = 0; os << "#slice\tRT_begin\tRT_end\tnumber_of_features\ttic\t" << "int_mean\tint_stddev\tint_min\tint_max\tint_median\tint_lowerq\tint_upperq\t" << "mz_mean\tmz_stddev\tmz_min\tmz_max\tmz_median\tmz_lowerq\tmz_upperq\t" << "width_mean\twidth_stddev\twidth_min\twidth_max\twidth_median\twidth_lowerq\twidth_upperq\t" << "qual_mean\tqual_stddev\tqual_min\tqual_max\tqual_median\tqual_lowerq\tqual_upperq\t" << "rt_qual_mean\trt_qual_stddev\trt_qual_min\trt_qual_max\trt_qual_median\trt_qual_lowerq\trt_qual_upperq\t" << "mz_qual_mean\tmz_qual_stddev\tmz_qual_min\tmz_qual_max\tmz_qual_median\tmz_qual_lowerq\tmz_qual_upperq" << endl; double rt_begin = 0.0; for (Size slice = 0; slice < n; ++slice) { // Determine slice boundaries. double rt_end = feat.back().getRT() / (double)n * (slice + 1); for (end = begin; end < feat.size() && feat[end].getRT() < rt_end; ++end) {} // Compute statistics on all features in this slice. slice_stats = sliceStatistics(feat, begin, end); // Write the beginning and end of the slices to the output as well as the slice index. os << slice << "\t" << rt_begin << "\t" << rt_end << "\t" << end - begin << "\t"; // Write the statistics as a line of an csv file copy(slice_stats.begin(), slice_stats.end(), ostream_iterator<double>(os, "\t")); os << endl; begin = end; rt_begin = rt_end; } } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { Size size = cons.size(); vector<double> intensities; intensities.reserve(size); vector<double> qualities(size); qualities.reserve(size); vector<double> widths(size); widths.reserve(size); vector<double> rt_delta_by_elems; vector<double> rt_aad_by_elems; vector<double> rt_aad_by_cfs; rt_aad_by_cfs.reserve(size); vector<double> mz_delta_by_elems; vector<double> mz_aad_by_elems; vector<double> mz_aad_by_cfs; mz_aad_by_cfs.reserve(size); vector<double> it_delta_by_elems; vector<double> it_aad_by_elems; vector<double> it_aad_by_cfs; it_aad_by_cfs.reserve(size); for (ConsensusMap::const_iterator cm_iter = cons.begin(); cm_iter != cons.end(); ++cm_iter) { double rt_aad = 0; double mz_aad = 0; double it_aad = 0; intensities.push_back(cm_iter->getIntensity()); qualities.push_back(cm_iter->getQuality()); widths.push_back(cm_iter->getWidth()); for (ConsensusFeature::HandleSetType::const_iterator hs_iter = cm_iter->begin(); hs_iter != cm_iter->end(); ++hs_iter) { double rt_diff = hs_iter->getRT() - cm_iter->getRT(); rt_delta_by_elems.push_back(rt_diff); if (rt_diff < 0) { rt_diff = -rt_diff; } rt_aad_by_elems.push_back(rt_diff); rt_aad += rt_diff; double mz_diff = hs_iter->getMZ() - cm_iter->getMZ(); mz_delta_by_elems.push_back(mz_diff); if (mz_diff < 0) { mz_diff = -mz_diff; } mz_aad_by_elems.push_back(mz_diff); mz_aad += mz_diff; double it_ratio = hs_iter->getIntensity() / (cm_iter->getIntensity() ? cm_iter->getIntensity() : 1.); it_delta_by_elems.push_back(it_ratio); if (it_ratio < 1.) { it_ratio = 1. / it_ratio; } it_aad_by_elems.push_back(it_ratio); it_aad += it_ratio; } if (!cm_iter->empty()) { rt_aad /= cm_iter->size(); mz_aad /= cm_iter->size(); it_aad /= cm_iter->size(); } // otherwise rt_aad etc. are 0 anyway rt_aad_by_cfs.push_back(rt_aad); mz_aad_by_cfs.push_back(mz_aad); it_aad_by_cfs.push_back(it_aad); } OpenMS::SomeStatistics some_statistics; os.precision(writtenDigits(ConsensusFeature::IntensityType())); os << "Intensities of consensus features:" << endl << some_statistics(intensities) << endl; os.precision(writtenDigits(ConsensusFeature::QualityType())); os << "Qualities of consensus features:" << endl << some_statistics(qualities) << endl; os.precision(writtenDigits(ConsensusFeature::CoordinateType())); os << "Retention time differences ( element-center, weight 1 per element):" << endl << some_statistics(rt_delta_by_elems) << endl; os << "Absolute retention time differences ( |element-center|, weight 1 per element):" << endl << some_statistics(rt_aad_by_elems) << endl; os << "Average absolute differences of retention time within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(rt_aad_by_cfs) << endl; os.precision(writtenDigits(ConsensusFeature::CoordinateType())); os << "Mass-to-charge differences ( element-center, weight 1 per element):" << endl << some_statistics(mz_delta_by_elems) << endl; os << "Absolute differences of mass-to-charge ( |element-center|, weight 1 per element):" << endl << some_statistics(mz_aad_by_elems) << endl; os << "Average absolute differences of mass-to-charge within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(mz_aad_by_cfs) << endl; os.precision(writtenDigits(ConsensusFeature::IntensityType())); os << "Intensity ratios ( element/center, weight 1 per element):" << endl << some_statistics(it_delta_by_elems) << endl; os << "Relative intensity error ( max{(element/center),(center/element)}, weight 1 per element):" << endl << some_statistics(it_aad_by_elems) << endl; os << "Average relative intensity error within consensus features ( max{(element/center),(center/element)}, weight 1 per consensus features):" << endl << some_statistics(it_aad_by_cfs) << endl; } return EXECUTION_OK; }
pItraq.setValue("channel_116_description", "else"); q_method->setParameters(pItraq); IsobaricChannelExtractor ice(q_method); // disable activation filtering Param p = ice.getParameters(); p.setValue("select_activation", ""); ice.setParameters(p); // extract channels ConsensusMap cm_out; ice.extractChannels(exp, cm_out); // check channel meta information TEST_EQUAL(cm_out.getFileDescriptions().size(), 4) ABORT_IF(cm_out.getFileDescriptions().size() != 4) TEST_EQUAL(cm_out.getFileDescriptions()[0].label, "itraq4plex_114") TEST_EQUAL(cm_out.getFileDescriptions()[0].getMetaValue("channel_name"), 114) TEST_EQUAL(cm_out.getFileDescriptions()[0].getMetaValue("channel_id"), 0) TEST_EQUAL(cm_out.getFileDescriptions()[0].getMetaValue("channel_description"), "ref") TEST_EQUAL(cm_out.getFileDescriptions()[0].getMetaValue("channel_center"), 114.1112) TEST_EQUAL(cm_out.getFileDescriptions()[1].label, "itraq4plex_115") TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_name"), 115) TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_id"), 1) TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_description"), "something") TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_center"), 115.1082) TEST_EQUAL(cm_out.getFileDescriptions()[2].label, "itraq4plex_116")
void LabeledPairFinder::run(const vector<ConsensusMap>& input_maps, ConsensusMap& result_map) { if (input_maps.size() != 1) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "exactly one input map required"); if (result_map.getFileDescriptions().size() != 2) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "two file descriptions required"); if (result_map.getFileDescriptions().begin()->second.filename != result_map.getFileDescriptions().rbegin()->second.filename) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the two file descriptions have to contain the same file name"); checkIds_(input_maps); //look up the light and heavy index Size light_index = numeric_limits<Size>::max(); Size heavy_index = numeric_limits<Size>::max(); for (ConsensusMap::FileDescriptions::const_iterator it = result_map.getFileDescriptions().begin(); it != result_map.getFileDescriptions().end(); ++it) { if (it->second.label == "heavy") { heavy_index = it->first; } else if (it->second.label == "light") { light_index = it->first; } } if (light_index == numeric_limits<Size>::max() || heavy_index == numeric_limits<Size>::max()) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the input maps have to be labeled 'light' and 'heavy'"); } result_map.clear(false); // sort consensus features by RT (and MZ) to speed up searching afterwards typedef ConstRefVector<ConsensusMap> RefMap; RefMap model_ref(input_maps[0].begin(), input_maps[0].end()); model_ref.sortByPosition(); //calculate matches ConsensusMap matches; //settings double rt_pair_dist = param_.getValue("rt_pair_dist"); double rt_dev_low = param_.getValue("rt_dev_low"); double rt_dev_high = param_.getValue("rt_dev_high"); double mz_dev = param_.getValue("mz_dev"); DoubleList mz_pair_dists = param_.getValue("mz_pair_dists"); bool mrm = param_.getValue("mrm").toBool(); //estimate RT parameters if (param_.getValue("rt_estimate") == "true") { //find all possible RT distances of features with the same charge and a good m/z distance vector<double> dists; dists.reserve(model_ref.size()); for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it) { for (RefMap::const_iterator it2 = model_ref.begin(); it2 != model_ref.end(); ++it2) { for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it) { double mz_pair_dist = *dist_it; if (it2->getCharge() == it->getCharge() && it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev && it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev) { dists.push_back(it2->getRT() - it->getRT()); } } } } if (dists.empty()) { cout << "Warning: Could not find pairs for RT distance estimation. The manual settings are used!" << endl; } else { if (dists.size() < 50) { cout << "Warning: Found only " << dists.size() << " pairs. The estimated shift and std deviation are probably not reliable!" << endl; } //--------------------------- estimate initial parameters of fit --------------------------- GaussFitter::GaussFitResult result(-1, -1, -1); //first estimate of the optimal shift: median of the distances sort(dists.begin(), dists.end()); Size median_index = dists.size() / 2; result.x0 = dists[median_index]; //create histogram of distances //consider only the maximum of pairs, centered around the optimal shift Size max_pairs = model_ref.size() / 2; Size start_index = (Size) max((SignedSize)0, (SignedSize)(median_index - max_pairs / 2)); Size end_index = (Size) min((SignedSize)(dists.size() - 1), (SignedSize)(median_index + max_pairs / 2)); double start_value = dists[start_index]; double end_value = dists[end_index]; double bin_step = fabs(end_value - start_value) / 99.999; //ensure that we have 100 bins Math::Histogram<> hist(start_value, end_value, bin_step); //std::cout << "HIST from " << start_value << " to " << end_value << " (bin size " << bin_step << ")" << endl; for (Size i = start_index; i <= end_index; ++i) { hist.inc(dists[i]); } //cout << hist << endl; dists.clear(); //determine median of bins (uniform background distribution) vector<Size> bins(hist.begin(), hist.end()); sort(bins.begin(), bins.end()); Size bin_median = bins[bins.size() / 2]; bins.clear(); //estimate scale A: maximum of the histogram Size max_value = hist.maxValue(); result.A = max_value - bin_median; //overwrite estimate of x0 with the position of the highest bin for (Size i = 0; i < hist.size(); ++i) { if (hist[i] == max_value) { result.x0 = hist.centerOfBin(i); break; } } //estimate sigma: first time the count is less or equal the median count in the histogram double pos = result.x0; while (pos > start_value && hist.binValue(pos) > bin_median) { pos -= bin_step; } double sigma_low = result.x0 - pos; pos = result.x0; while (pos<end_value&& hist.binValue(pos)> bin_median) { pos += bin_step; } double sigma_high = pos - result.x0; result.sigma = (sigma_high + sigma_low) / 6.0; //cout << "estimated optimal RT distance (before fit): " << result.x0 << endl; //cout << "estimated allowed deviation (before fit): " << result.sigma*3.0 << endl; //--------------------------- do gauss fit --------------------------- vector<DPosition<2> > points(hist.size()); for (Size i = 0; i < hist.size(); ++i) { points[i][0] = hist.centerOfBin(i); points[i][1] = max(0u, hist[i]); } GaussFitter fitter; fitter.setInitialParameters(result); result = fitter.fit(points); cout << "estimated optimal RT distance: " << result.x0 << endl; cout << "estimated allowed deviation: " << fabs(result.sigma) * 3.0 << endl; rt_pair_dist = result.x0; rt_dev_low = fabs(result.sigma) * 3.0; rt_dev_high = fabs(result.sigma) * 3.0; } } // check each feature for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it) { for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it) { double mz_pair_dist = *dist_it; RefMap::const_iterator it2 = lower_bound(model_ref.begin(), model_ref.end(), it->getRT() + rt_pair_dist - rt_dev_low, ConsensusFeature::RTLess()); while (it2 != model_ref.end() && it2->getRT() <= it->getRT() + rt_pair_dist + rt_dev_high) { // if in mrm mode, we need to compare precursor mass difference and fragment mass difference, charge remains the same double prec_mz_diff(0); if (mrm) { prec_mz_diff = fabs((double)it2->getMetaValue("MZ") - (double)it->getMetaValue("MZ")); if (it->getCharge() != 0) { prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist / it->getCharge()); } else { prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist); } } bool mrm_correct_dist(false); double frag_mz_diff = fabs(it->getMZ() - it2->getMZ()); //cerr << it->getRT() << " charge1=" << it->getCharge() << ", charge2=" << it2->getCharge() << ", prec_diff=" << prec_mz_diff << ", frag_diff=" << frag_mz_diff << endl; if (mrm && it2->getCharge() == it->getCharge() && prec_mz_diff < mz_dev && (frag_mz_diff < mz_dev || fabs(frag_mz_diff - mz_pair_dist) < mz_dev)) { mrm_correct_dist = true; //cerr << "mrm_correct_dist" << endl; } if ((mrm && mrm_correct_dist) || (!mrm && it2->getCharge() == it->getCharge() && it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev && it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev )) { //cerr << "dist correct" << endl; double score = sqrt( PValue_(it2->getMZ() - it->getMZ(), mz_pair_dist / it->getCharge(), mz_dev, mz_dev) * PValue_(it2->getRT() - it->getRT(), rt_pair_dist, rt_dev_low, rt_dev_high) ); // Note: we used to copy the id from the light feature here, but that strategy does not generalize to more than two labels. // We might want to report consensus features where the light one is missing but more than one heavier variant was found. // Also, the old strategy is inconsistent with what was done in the unlabeled case. Thus now we assign a new unique id here. matches.push_back(ConsensusFeature()); matches.back().setUniqueId(); matches.back().insert(light_index, *it); matches.back().clearMetaInfo(); matches.back().insert(heavy_index, *it2); matches.back().setQuality(score); matches.back().setCharge(it->getCharge()); matches.back().computeMonoisotopicConsensus(); } ++it2; } } } //compute best pairs // - sort matches by quality // - take highest-quality matches first (greedy) and mark them as used set<Size> used_features; matches.sortByQuality(true); for (ConsensusMap::const_iterator match = matches.begin(); match != matches.end(); ++match) { //check if features are not used yet if (used_features.find(match->begin()->getUniqueId()) == used_features.end() && used_features.find(match->rbegin()->getUniqueId()) == used_features.end() ) { //if unused, add it to the final set of elements result_map.push_back(*match); used_features.insert(match->begin()->getUniqueId()); used_features.insert(match->rbegin()->getUniqueId()); } } //Add protein identifications to result map for (Size i = 0; i < input_maps.size(); ++i) { result_map.getProteinIdentifications().insert(result_map.getProteinIdentifications().end(), input_maps[i].getProteinIdentifications().begin(), input_maps[i].getProteinIdentifications().end()); } //Add unassigned peptide identifications to result map for (Size i = 0; i < input_maps.size(); ++i) { result_map.getUnassignedPeptideIdentifications().insert(result_map.getUnassignedPeptideIdentifications().end(), input_maps[i].getUnassignedPeptideIdentifications().begin(), input_maps[i].getUnassignedPeptideIdentifications().end()); } // Very useful for checking the results, and the ids have no real meaning anyway result_map.sortByMZ(); }
void EDTAFile::load(const String& filename, ConsensusMap& consensus_map) { // load input TextFile input(filename); TextFile::ConstIterator input_it = input.begin(); // reset map consensus_map = ConsensusMap(); consensus_map.setUniqueId(); char separator = ' '; if (input_it->hasSubstring("\t")) separator = '\t'; else if (input_it->hasSubstring(" ")) separator = ' '; else if (input_it->hasSubstring(",")) separator = ','; // parsing header line std::vector<String> headers; input_it->split(separator, headers); int offset = 0; for (Size i = 0; i < headers.size(); ++i) { headers[i].trim(); } String header_trimmed = *input.begin(); header_trimmed.trim(); enum { TYPE_UNDEFINED, TYPE_OLD_NOCHARGE, TYPE_OLD_CHARGE, TYPE_CONSENSUS } input_type = TYPE_UNDEFINED; Size input_features = 1; double rt = 0.0; double mz = 0.0; double it = 0.0; Int ch = 0; if (headers.size() <= 2) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: not enough columns! Expected at least 3 columns!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } else if (headers.size() == 3) input_type = TYPE_OLD_NOCHARGE; else if (headers.size() == 4) input_type = TYPE_OLD_CHARGE; // see if we have a header try { // try to convert... if not: thats a header rt = headers[0].toDouble(); mz = headers[1].toDouble(); it = headers[2].toDouble(); } catch (Exception::BaseException&) { offset = 1; ++input_it; LOG_INFO << "Detected a header line.\n"; } if (headers.size() >= 5) { if (String(headers[4].trim()).toUpper() == "RT1") input_type = TYPE_CONSENSUS; else input_type = TYPE_OLD_CHARGE; } if (input_type == TYPE_CONSENSUS) { // Every consensus style line includes features with four columns. // The remainder is meta data input_features = headers.size() / 4; } if (offset == 0 && (input_type == TYPE_OLD_CHARGE || input_type == TYPE_CONSENSUS)) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: No HEADER provided. This is only allowed for three columns. You have more!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } SignedSize input_size = input.end() - input.begin(); ConsensusMap::FileDescription desc; desc.filename = filename; desc.size = (input_size) - offset; consensus_map.getFileDescriptions()[0] = desc; // parsing features consensus_map.reserve(input_size); for (; input_it != input.end(); ++input_it) { //do nothing for empty lines String line_trimmed = *input_it; line_trimmed.trim(); if (line_trimmed == "") { if ((input_it - input.begin()) < input_size - 1) LOG_WARN << "Notice: Empty line ignored (line " << ((input_it - input.begin()) + 1) << ")."; continue; } //split line to tokens std::vector<String> parts; input_it->split(separator, parts); //abort if line does not contain enough fields if (parts.size() < 3) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": At least three columns are needed! (got " + String(parts.size()) + ")\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } ConsensusFeature cf; cf.setUniqueId(); try { // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, 0); mz = checkedToDouble_(parts, 1); it = checkedToDouble_(parts, 2); ch = checkedToInt_(parts, 3); cf.setRT(rt); cf.setMZ(mz); cf.setIntensity(it); if (input_type != TYPE_OLD_NOCHARGE) cf.setCharge(ch); } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert the first three columns to a number!\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } // Check all features in one line for (Size j = 1; j < input_features; ++j) { try { Feature f; f.setUniqueId(); // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, j * 4 + 0); mz = checkedToDouble_(parts, j * 4 + 1); it = checkedToDouble_(parts, j * 4 + 2); ch = checkedToInt_(parts, j * 4 + 3); // Only accept features with at least RT and MZ set if (rt != -1 && mz != -1) { f.setRT(rt); f.setMZ(mz); f.setIntensity(it); f.setCharge(ch); cf.insert(j - 1, f); } } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert one of the four sub-feature columns (starting at column " + (j * 4 + 1) + ") to a number! Is the correct separator specified?\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } } //parse meta data for (Size j = input_features * 4; j < parts.size(); ++j) { String part_trimmed = parts[j]; part_trimmed.trim(); if (part_trimmed != "") { //check if column name is ok if (headers.size() <= j || headers[j] == "") { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Error: Missing meta data header for column ") + (j + 1) + "!" + String("Offending header line: '") + header_trimmed + "' (line 1)"); } //add meta value cf.setMetaValue(headers[j], part_trimmed); } } //insert feature to map consensus_map.push_back(cf); } // register FileDescriptions ConsensusMap::FileDescription fd; fd.filename = filename; fd.size = consensus_map.size(); Size maps = std::max(input_features - 1, Size(1)); // its either a simple feature or a consensus map // (in this case the 'input_features' includes the centroid, which we do not count) for (Size i = 0; i < maps; ++i) { fd.label = String("EDTA_Map ") + String(i); consensus_map.getFileDescriptions()[i] = fd; } }
START_SECTION((void load(const String &filename, ConsensusMap & map))) ConsensusMap map; ConsensusXMLFile file; file.load(OPENMS_GET_TEST_DATA_PATH("ConsensusXMLFile_1.consensusXML"), map); //test DocumentIdentifier addition TEST_STRING_EQUAL(map.getLoadedFilePath(), OPENMS_GET_TEST_DATA_PATH("ConsensusXMLFile_1.consensusXML")); TEST_STRING_EQUAL(FileTypes::typeToName(map.getLoadedFileType()), "consensusXML"); //meta data TEST_EQUAL(map.getIdentifier(), "lsid") TEST_EQUAL(map.getExperimentType() == "label-free", true) TEST_EQUAL(map.getMetaValue("name1") == DataValue("value1"), true) TEST_EQUAL(map.getMetaValue("name2") == DataValue(2), true) //file descriptions TEST_EQUAL(map.getFileDescriptions()[0].filename == "data/MapAlignmentFeatureMap1.xml", true) TEST_EQUAL(map.getFileDescriptions()[0].label, "label") TEST_EQUAL(map.getFileDescriptions()[0].size, 144) TEST_EQUAL(map.getFileDescriptions()[0].getMetaValue("name3") == DataValue("value3"), true) TEST_EQUAL(map.getFileDescriptions()[0].getMetaValue("name4") == DataValue(4), true) TEST_STRING_EQUAL(map.getFileDescriptions()[1].filename, "data/MapAlignmentFeatureMap2.xml") TEST_EQUAL(map.getFileDescriptions()[1].label, "") TEST_EQUAL(map.getFileDescriptions()[1].size, 0) TEST_EQUAL(map.getFileDescriptions()[1].getMetaValue("name5") == DataValue("value5"), true) TEST_EQUAL(map.getFileDescriptions()[1].getMetaValue("name6") == DataValue(6.0), true) //data processing TEST_EQUAL(map.getDataProcessing().size(), 2) TEST_STRING_EQUAL(map.getDataProcessing()[0].getSoftware().getName(), "Software1") TEST_STRING_EQUAL(map.getDataProcessing()[0].getSoftware().getVersion(), "0.91a") TEST_EQUAL(map.getDataProcessing()[0].getProcessingActions().size(), 1) TEST_EQUAL(map.getDataProcessing()[0].getProcessingActions().count(DataProcessing::DEISOTOPING), 1)
FGA* nullPointer = 0; START_SECTION((FeatureGroupingAlgorithm())) ptr = new FGA(); TEST_NOT_EQUAL(ptr, nullPointer) END_SECTION START_SECTION((virtual ~FeatureGroupingAlgorithm())) delete ptr; END_SECTION START_SECTION((virtual void group(const vector< FeatureMap > &maps, ConsensusMap &out)=0)) FGA fga; vector< FeatureMap > in; ConsensusMap map; fga.group(in,map); TEST_EQUAL(map.getFileDescriptions()[0].filename, "bla") END_SECTION START_SECTION((static void registerChildren())) { TEST_STRING_EQUAL(Factory<FeatureGroupingAlgorithm>::registeredProducts()[0],FeatureGroupingAlgorithmLabeled::getProductName()); TEST_STRING_EQUAL(Factory<FeatureGroupingAlgorithm>::registeredProducts()[1],FeatureGroupingAlgorithmUnlabeled::getProductName()); TEST_EQUAL(Factory<FeatureGroupingAlgorithm>::registeredProducts().size(), 3) } END_SECTION START_SECTION((void transferSubelements(const vector<ConsensusMap>& maps, ConsensusMap& out) const)) { vector<ConsensusMap> maps(2); maps[0].getFileDescriptions()[0].filename = "file1"; maps[0].getFileDescriptions()[0].size = 1;
void group(const vector< FeatureMap >&, ConsensusMap& map) { map.getFileDescriptions()[0].filename = "bla"; map.getFileDescriptions()[0].size = 5; }
void ProteinInference::infer_(ConsensusMap & consensus_map, const size_t protein_idenfication_index, const UInt reference_map) { ProteinIdentification & protein_ident = consensus_map.getProteinIdentifications()[protein_idenfication_index]; for (size_t i = 0; i < protein_ident.getHits().size(); ++i) { // Protein Accession String accession = protein_ident.getHits()[i].getAccession(); // consensus feature -> peptide hit Map<size_t, PeptideHit> consensus_to_peptide; // search for it in consensus elements: for (size_t i_cm = 0; i_cm < consensus_map.size(); ++i_cm) { std::vector<PeptideHit> peptide_hits; for (std::vector<PeptideIdentification>::iterator it_pepid = consensus_map[i_cm].getPeptideIdentifications().begin(); it_pepid != consensus_map[i_cm].getPeptideIdentifications().end(); ++it_pepid) { // are Protein- and PeptideIdentification from the same search engine run? if (it_pepid->getIdentifier() != protein_ident.getIdentifier()) continue; std::vector<PeptideHit> peptide_hits_local; it_pepid->getReferencingHits(accession, peptide_hits_local); if (peptide_hits_local.empty()) continue; if (sortByUnique_(peptide_hits_local, it_pepid->isHigherScoreBetter())) // we found a unique peptide { peptide_hits.push_back(peptide_hits_local[0]); } } // if several PeptideIdentifications (==Spectra) were assigned to current ConsensusElement // --> take the best (as above), e.g. in SILAC this could happen // TODO: better idea? if (!peptide_hits.empty()) { if (sortByUnique_(peptide_hits, consensus_map[i_cm].getPeptideIdentifications()[0].isHigherScoreBetter())) //found a unique peptide for current ConsensusElement { consensus_to_peptide[i_cm] = peptide_hits[0]; #ifdef DEBUG_INFERENCE std::cout << "assign peptide " << peptide_hits[0].getSequence() << " to Protein " << accession << std::endl; #endif } } } // ! ConsensusMap loop // no peptides found that match current Protein if (consensus_to_peptide.empty()) continue; // Use all matching ConsensusElements to derive a quantitation for current protein // build up ratios for every map vs reference double coverage = 0; Map<Size, std::vector<IntensityType> > ratios; // number of unique peptides pointing to current protein UInt coverage_count = (UInt)consensus_to_peptide.size(); for (Map<size_t, PeptideHit>::iterator it_pephits = consensus_to_peptide.begin(); it_pephits != consensus_to_peptide.end(); ++it_pephits) { coverage += it_pephits->second.getSequence().size(); const ConsensusFeature::HandleSetType & handles = consensus_map[it_pephits->first].getFeatures(); //search if reference is present ConsensusFeature::HandleSetType::const_iterator it_ref = handles.end(); for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin(); it != handles.end(); ++it) { if (it->getMapIndex() == reference_map) { it_ref = it; break; } } // did not find a reference // TODO assume intensity==0 instead?? if (it_ref == handles.end()) continue; for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin(); it != handles.end(); ++it) { ratios[it->getMapIndex()].push_back(it->getIntensity() / it_ref->getIntensity()); } } // sort ratios map-wise and take median for (ConsensusMap::FileDescriptions::const_iterator it_file = consensus_map.getFileDescriptions().begin(); it_file != consensus_map.getFileDescriptions().end(); ++it_file) { if (ratios.has(it_file->first)) { //sort intensity ratios for map #it_file->first std::sort(ratios[it_file->first].begin(), ratios[it_file->first].end()); //take median IntensityType protein_ratio = ratios[it_file->first][ratios[it_file->first].size() / 2]; //TODO if ratios have high variance emit a warning! protein_ident.getHits()[i].setMetaValue(String("ratio_") + String(it_file->first), protein_ratio); } } // ! map loop // % coverage of protein by peptides coverage /= DoubleReal(protein_ident.getHits()[i].getSequence().size()) / 100; protein_ident.getHits()[i].setMetaValue("coverage", coverage); protein_ident.getHits()[i].setMetaValue("hits", coverage_count); } // ! Protein loop // protein_to_peptides now contains the Protein -> Peptides mapping // lets estimate the }
void FeatureGroupingAlgorithmUnlabeled::group(const std::vector<FeatureMap> & maps, ConsensusMap & out) { // check that the number of maps is ok if (maps.size() < 2) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "At least two maps must be given!"); } // define reference map (the one with most peaks) Size reference_map_index = 0; Size max_count = 0; for (Size m = 0; m < maps.size(); ++m) { if (maps[m].size() > max_count) { max_count = maps[m].size(); reference_map_index = m; } } std::vector<ConsensusMap> input(2); // build a consensus map of the elements of the reference map (contains only singleton consensus elements) MapConversion::convert(reference_map_index, maps[reference_map_index], input[0]); // loop over all other maps, extend the groups StablePairFinder pair_finder; pair_finder.setParameters(param_.copy("", true)); for (Size i = 0; i < maps.size(); ++i) { if (i != reference_map_index) { MapConversion::convert(i, maps[i], input[1]); // compute the consensus of the reference map and map i ConsensusMap result; pair_finder.run(input, result); input[0].swap(result); } } // replace result with temporary map out.swap(input[0]); // copy back the input maps (they have been deleted while swapping) out.getFileDescriptions() = input[0].getFileDescriptions(); // add protein IDs and unassigned peptide IDs to the result map here, // to keep the same order as the input maps (useful for output later) for (std::vector<FeatureMap>::const_iterator map_it = maps.begin(); map_it != maps.end(); ++map_it) { // add protein identifications to result map out.getProteinIdentifications().insert( out.getProteinIdentifications().end(), map_it->getProteinIdentifications().begin(), map_it->getProteinIdentifications().end()); // add unassigned peptide identifications to result map out.getUnassignedPeptideIdentifications().insert( out.getUnassignedPeptideIdentifications().end(), map_it->getUnassignedPeptideIdentifications().begin(), map_it->getUnassignedPeptideIdentifications().end()); } // canonical ordering for checking the results, and the ids have no real meaning anyway #if 1 // the way this was done in DelaunayPairFinder and StablePairFinder out.sortByMZ(); #else out.sortByQuality(); out.sortByMaps(); out.sortBySize(); #endif return; }
ExitCodes main_(int, const char **) { String in = getStringOption_("in"); StringList out = getStringList_("out"); SeedListGenerator seed_gen; // results (actually just one result, except for consensusXML input): Map<UInt64, SeedListGenerator::SeedList> seed_lists; Size num_maps = 0; FileTypes::Type in_type = FileHandler::getType(in); if (in_type == FileTypes::CONSENSUSXML) { ConsensusMap consensus; ConsensusXMLFile().load(in, consensus); num_maps = consensus.getFileDescriptions().size(); if (out.size() != num_maps) { writeLog_("Error: expected " + String(num_maps) + " output filenames"); return ILLEGAL_PARAMETERS; } seed_gen.generateSeedLists(consensus, seed_lists); } else if (out.size() > 1) { writeLog_("Error: expected only one output filename"); return ILLEGAL_PARAMETERS; } else if (in_type == FileTypes::MZML) { MSExperiment<> experiment; MzMLFile().load(in, experiment); seed_gen.generateSeedList(experiment, seed_lists[0]); } else if (in_type == FileTypes::IDXML) { vector<ProteinIdentification> proteins; vector<PeptideIdentification> peptides; IdXMLFile().load(in, proteins, peptides); seed_gen.generateSeedList(peptides, seed_lists[0], getFlag_("use_peptide_mass")); } else if (in_type == FileTypes::FEATUREXML) { FeatureMap features; FeatureXMLFile().load(in, features); seed_gen.generateSeedList( features.getUnassignedPeptideIdentifications(), seed_lists[0]); } // output: num_maps = 0; for (Map<UInt64, SeedListGenerator::SeedList>::Iterator it = seed_lists.begin(); it != seed_lists.end(); ++it, ++num_maps) { FeatureMap features; seed_gen.convertSeedList(it->second, features); //annotate output with data processing info: addDataProcessing_(features, getProcessingInfo_( DataProcessing::DATA_PROCESSING)); FeatureXMLFile().store(out[num_maps], features); } return EXECUTION_OK; }