/// @brief extracts the iTRAQ channels from the MS data and stores intensity values in a consensus map /// /// @param ms_exp_data Raw data to read /// @param consensus_map Output each MS² scan as a consensus feature /// @throws Exception::MissingInformation if no scans present or MS² scan has no precursor void ItraqChannelExtractor::run(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map) { if (ms_exp_data.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry."; throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!"); } MSExperiment<> ms_exp_MS2; String mode = (String) param_.getValue("select_activation"); std::cout << "Selecting scans with activation mode: " << (mode == "" ? "any" : mode) << "\n"; HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(ListUtils::create<String>(mode)); for (size_t idx = 0; idx < ms_exp_data.size(); ++idx) { if (ms_exp_data[idx].getMSLevel() == 2) { if (mode == "" || activation_predicate(ms_exp_data[idx])) { // copy only MS² scans ms_exp_MS2.addSpectrum(ms_exp_data[idx]); } else { //std::cout << "deleting spectrum # " << idx << " with RT: " << ms_exp_data[idx].getRT() << "\n"; } } } #ifdef ITRAQ_DEBUG std::cout << "we have " << ms_exp_MS2.size() << " scans left of level " << ms_exp_MS2[0].getMSLevel() << std::endl; std::cout << "run: channel_map_ has " << channel_map_.size() << " entries!" << std::endl; #endif consensus_map.clear(false); // set <mapList> header Int index_cnt = 0; for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it) { // structure of Map cm_it // first == channel-name as Int e.g. 114 // second == ChannelInfo struct ConsensusMap::FileDescription channel_as_map; // label is the channel + description provided in the Params if (itraq_type_ != TMT_SIXPLEX) channel_as_map.label = "iTRAQ_" + String(cm_it->second.name) + "_" + String(cm_it->second.description); else channel_as_map.label = "TMT_" + String(cm_it->second.name) + "_" + String(cm_it->second.description); channel_as_map.size = ms_exp_MS2.size(); //TODO what about .filename? leave empty? // add some more MetaInfo channel_as_map.setMetaValue("channel_name", cm_it->second.name); channel_as_map.setMetaValue("channel_id", cm_it->second.id); channel_as_map.setMetaValue("channel_description", cm_it->second.description); channel_as_map.setMetaValue("channel_center", cm_it->second.center); channel_as_map.setMetaValue("channel_active", String(cm_it->second.active ? "true" : "false")); consensus_map.getFileDescriptions()[index_cnt++] = channel_as_map; } // create consensusElements Peak2D::CoordinateType allowed_deviation = (Peak2D::CoordinateType) param_.getValue("reporter_mass_shift"); // now we have picked data // --> assign peaks to channels UInt element_index(0); for (MSExperiment<>::ConstIterator it = ms_exp_MS2.begin(); it != ms_exp_MS2.end(); ++it) { // store RT&MZ of parent ion as centroid of ConsensusFeature ConsensusFeature cf; cf.setUniqueId(); cf.setRT(it->getRT()); if (it->getPrecursors().size() >= 1) { cf.setMZ(it->getPrecursors()[0].getMZ()); } else { throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + String(it->getNativeID()) + " with RT " + String(it->getRT())); } Peak2D channel_value; channel_value.setRT(it->getRT()); // for each each channel Int index = 0; Peak2D::IntensityType overall_intensity = 0; for (ChannelMapType::const_iterator cm_it = channel_map_.begin(); cm_it != channel_map_.end(); ++cm_it) { // set mz-position of channel channel_value.setMZ(cm_it->second.center); // reset intensity channel_value.setIntensity(0); //add up all signals for (MSExperiment<>::SpectrumType::ConstIterator mz_it = it->MZBegin(cm_it->second.center - allowed_deviation) ; mz_it != it->MZEnd(cm_it->second.center + allowed_deviation) ; ++mz_it ) { channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity()); } overall_intensity += channel_value.getIntensity(); // add channel to ConsensusFeature cf.insert(index++, channel_value, element_index); } // ! channel_iterator // check featureHandles are not empty if (overall_intensity == 0) { cf.setMetaValue("all_empty", String("true")); } cf.setIntensity(overall_intensity); consensus_map.push_back(cf); // the tandem-scan in the order they appear in the experiment ++element_index; } // ! Experiment iterator #ifdef ITRAQ_DEBUG std::cout << "processed " << element_index << " scans" << std::endl; #endif consensus_map.setExperimentType("itraq"); return; }
void IsobaricChannelExtractor::extractChannels(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map) { if (ms_exp_data.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry.\n"; throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!"); } // clear the output map consensus_map.clear(false); consensus_map.setExperimentType("labeled_MS2"); // create predicate for spectrum checking LOG_INFO << "Selecting scans with activation mode: " << (selected_activation_ == "" ? "any" : selected_activation_) << "\n"; HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(StringList::create(selected_activation_)); // now we have picked data // --> assign peaks to channels UInt64 element_index(0); // remember the current precusor spectrum MSExperiment<Peak1D>::ConstIterator prec_spec = ms_exp_data.end(); for (MSExperiment<Peak1D>::ConstIterator it = ms_exp_data.begin(); it != ms_exp_data.end(); ++it) { // remember the last MS1 spectra as we assume it to be the precursor spectrum if (it->getMSLevel() == 1) prec_spec = it; if (selected_activation_ == "" || activation_predicate(*it)) { // check if precursor is available if (it->getPrecursors().empty()) { throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + it->getNativeID() + " with RT " + String(it->getRT())); } // check precursor constraints if (!isValidPrecursor_(it->getPrecursors()[0])) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor doesn't fulfill all constraints." << std::endl; continue; } // check precursor purity if we have a valid precursor .. if (prec_spec != ms_exp_data.end()) { const DoubleReal purity = computePrecursorPurity_(it, prec_spec); if (purity < min_precursor_purity_) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold. [purity = " << purity << "]" << std::endl; continue; } } else { LOG_INFO << "No precursor available for spectrum: " << it->getNativeID() << std::endl; } if (!(prec_spec == ms_exp_data.end()) && computePrecursorPurity_(it, prec_spec) < min_precursor_purity_) { LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold." << std::endl; continue; } // store RT&MZ of parent ion as centroid of ConsensusFeature ConsensusFeature cf; cf.setUniqueId(); cf.setRT(it->getRT()); cf.setMZ(it->getPrecursors()[0].getMZ()); Peak2D channel_value; channel_value.setRT(it->getRT()); // for each each channel UInt64 map_index = 0; Peak2D::IntensityType overall_intensity = 0; for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin(); cl_it != quant_method_->getChannelInformation().end(); ++cl_it) { // set mz-position of channel channel_value.setMZ(cl_it->center); // reset intensity channel_value.setIntensity(0); // as every evaluation requires time, we cache the MZEnd iterator const MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_end = it->MZEnd(cl_it->center + reporter_mass_shift_); // add up all signals for (MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_it = it->MZBegin(cl_it->center - reporter_mass_shift_); mz_it != mz_end; ++mz_it) { channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity()); } // discard contribution of this channel as it is below the required intensity threshold if (channel_value.getIntensity() < min_reporter_intensity_) { channel_value.setIntensity(0); } overall_intensity += channel_value.getIntensity(); // add channel to ConsensusFeature cf.insert(map_index++, channel_value, element_index); } // ! channel_iterator // check if we keep this feature or if it contains low-intensity quantifications if (remove_low_intensity_quantifications_ && hasLowIntensityReporter_(cf)) { continue; } // check featureHandles are not empty if (overall_intensity == 0) { cf.setMetaValue("all_empty", String("true")); } cf.setIntensity(overall_intensity); consensus_map.push_back(cf); // the tandem-scan in the order they appear in the experiment ++element_index; } } // ! Experiment iterator /// add meta information to the map registerChannelsInOutputMap_(consensus_map); }
void EDTAFile::load(const String& filename, ConsensusMap& consensus_map) { // load input TextFile input(filename); TextFile::ConstIterator input_it = input.begin(); // reset map consensus_map = ConsensusMap(); consensus_map.setUniqueId(); char separator = ' '; if (input_it->hasSubstring("\t")) separator = '\t'; else if (input_it->hasSubstring(" ")) separator = ' '; else if (input_it->hasSubstring(",")) separator = ','; // parsing header line std::vector<String> headers; input_it->split(separator, headers); int offset = 0; for (Size i = 0; i < headers.size(); ++i) { headers[i].trim(); } String header_trimmed = *input.begin(); header_trimmed.trim(); enum { TYPE_UNDEFINED, TYPE_OLD_NOCHARGE, TYPE_OLD_CHARGE, TYPE_CONSENSUS } input_type = TYPE_UNDEFINED; Size input_features = 1; double rt = 0.0; double mz = 0.0; double it = 0.0; Int ch = 0; if (headers.size() <= 2) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: not enough columns! Expected at least 3 columns!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } else if (headers.size() == 3) input_type = TYPE_OLD_NOCHARGE; else if (headers.size() == 4) input_type = TYPE_OLD_CHARGE; // see if we have a header try { // try to convert... if not: thats a header rt = headers[0].toDouble(); mz = headers[1].toDouble(); it = headers[2].toDouble(); } catch (Exception::BaseException&) { offset = 1; ++input_it; LOG_INFO << "Detected a header line.\n"; } if (headers.size() >= 5) { if (String(headers[4].trim()).toUpper() == "RT1") input_type = TYPE_CONSENSUS; else input_type = TYPE_OLD_CHARGE; } if (input_type == TYPE_CONSENSUS) { // Every consensus style line includes features with four columns. // The remainder is meta data input_features = headers.size() / 4; } if (offset == 0 && (input_type == TYPE_OLD_CHARGE || input_type == TYPE_CONSENSUS)) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: No HEADER provided. This is only allowed for three columns. You have more!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } SignedSize input_size = input.end() - input.begin(); ConsensusMap::FileDescription desc; desc.filename = filename; desc.size = (input_size) - offset; consensus_map.getFileDescriptions()[0] = desc; // parsing features consensus_map.reserve(input_size); for (; input_it != input.end(); ++input_it) { //do nothing for empty lines String line_trimmed = *input_it; line_trimmed.trim(); if (line_trimmed == "") { if ((input_it - input.begin()) < input_size - 1) LOG_WARN << "Notice: Empty line ignored (line " << ((input_it - input.begin()) + 1) << ")."; continue; } //split line to tokens std::vector<String> parts; input_it->split(separator, parts); //abort if line does not contain enough fields if (parts.size() < 3) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": At least three columns are needed! (got " + String(parts.size()) + ")\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } ConsensusFeature cf; cf.setUniqueId(); try { // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, 0); mz = checkedToDouble_(parts, 1); it = checkedToDouble_(parts, 2); ch = checkedToInt_(parts, 3); cf.setRT(rt); cf.setMZ(mz); cf.setIntensity(it); if (input_type != TYPE_OLD_NOCHARGE) cf.setCharge(ch); } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert the first three columns to a number!\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } // Check all features in one line for (Size j = 1; j < input_features; ++j) { try { Feature f; f.setUniqueId(); // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, j * 4 + 0); mz = checkedToDouble_(parts, j * 4 + 1); it = checkedToDouble_(parts, j * 4 + 2); ch = checkedToInt_(parts, j * 4 + 3); // Only accept features with at least RT and MZ set if (rt != -1 && mz != -1) { f.setRT(rt); f.setMZ(mz); f.setIntensity(it); f.setCharge(ch); cf.insert(j - 1, f); } } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert one of the four sub-feature columns (starting at column " + (j * 4 + 1) + ") to a number! Is the correct separator specified?\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } } //parse meta data for (Size j = input_features * 4; j < parts.size(); ++j) { String part_trimmed = parts[j]; part_trimmed.trim(); if (part_trimmed != "") { //check if column name is ok if (headers.size() <= j || headers[j] == "") { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Error: Missing meta data header for column ") + (j + 1) + "!" + String("Offending header line: '") + header_trimmed + "' (line 1)"); } //add meta value cf.setMetaValue(headers[j], part_trimmed); } } //insert feature to map consensus_map.push_back(cf); } // register FileDescriptions ConsensusMap::FileDescription fd; fd.filename = filename; fd.size = consensus_map.size(); Size maps = std::max(input_features - 1, Size(1)); // its either a simple feature or a consensus map // (in this case the 'input_features' includes the centroid, which we do not count) for (Size i = 0; i < maps; ++i) { fd.label = String("EDTA_Map ") + String(i); consensus_map.getFileDescriptions()[i] = fd; } }