void BaseLabeler::recomputeConsensus_(const FeatureMapSim & simulated_features) { // iterate over all given features stored in the labeling consensus and try to find the corresponding feature in // in the feature map // build index for faster access Map<String, IntList> id_map; Map<UInt64, Size> features_per_labeled_map; for (Size i = 0; i < simulated_features.size(); ++i) { if (simulated_features[i].metaValueExists("parent_feature")) { LOG_DEBUG << "Checking [" << i << "]: " << simulated_features[i].getPeptideIdentifications()[0].getHits()[0].getSequence().toString() << " with charge " << simulated_features[i].getCharge() << " (" << simulated_features[i].getMetaValue("charge_adducts") << ")" << " parent was " << simulated_features[i].getMetaValue("parent_feature") << std::endl; id_map[simulated_features[i].getMetaValue("parent_feature")].push_back((Int)i); UInt64 map_index = 0; if (simulated_features[i].metaValueExists("map_index")) { map_index = simulated_features[i].getMetaValue("map_index"); } ++features_per_labeled_map[map_index]; } } for (Map<String, IntList>::iterator it = id_map.begin(); it != id_map.end(); ++it) { LOG_DEBUG << it->first << " " << it->second << std::endl; } // new consensus map ConsensusMap new_cm; // initialize submaps in consensus map for (Map<UInt64, Size>::Iterator it = features_per_labeled_map.begin(); it != features_per_labeled_map.end(); ++it) { new_cm.getFileDescriptions()[it->first].size = it->second; new_cm.getFileDescriptions()[it->first].unique_id = simulated_features.getUniqueId(); } for (ConsensusMap::iterator cm_iter = consensus_.begin(); cm_iter != consensus_.end(); ++cm_iter) { bool complete = true; LOG_DEBUG << "Checking consensus feature containing: " << std::endl; // check if we have all elements of current CF in the new feature map (simulated_features) for (ConsensusFeature::iterator cf_iter = (*cm_iter).begin(); cf_iter != (*cm_iter).end(); ++cf_iter) { complete &= id_map.has(String((*cf_iter).getUniqueId())); LOG_DEBUG << "\t" << String((*cf_iter).getUniqueId()) << std::endl; } if (complete) { // get all elements sorted by charge state; since the same charge can be achieved by different // adduct compositions we use the adduct-string as indicator to find the groups Map<String, std::set<FeatureHandle, FeatureHandle::IndexLess> > charge_mapping; for (ConsensusFeature::iterator cf_iter = (*cm_iter).begin(); cf_iter != (*cm_iter).end(); ++cf_iter) { IntList feature_indices = id_map[String((*cf_iter).getUniqueId())]; for (IntList::iterator it = feature_indices.begin(); it != feature_indices.end(); ++it) { UInt64 map_index = 0; if (simulated_features[*it].metaValueExists("map_index")) { map_index = simulated_features[*it].getMetaValue("map_index"); } if (charge_mapping.has(simulated_features[*it].getMetaValue("charge_adducts"))) { charge_mapping[simulated_features[*it].getMetaValue("charge_adducts")].insert(FeatureHandle(map_index, simulated_features[*it])); } else { LOG_DEBUG << "Create new set with charge composition " << simulated_features[*it].getMetaValue("charge_adducts") << std::endl; std::set<FeatureHandle, FeatureHandle::IndexLess> fh_set; fh_set.insert(FeatureHandle(map_index, simulated_features[*it])); charge_mapping.insert(std::make_pair(simulated_features[*it].getMetaValue("charge_adducts"), fh_set)); } } } // create new consensus feature from derived features (separated by charge, if charge != 0) for (Map<String, std::set<FeatureHandle, FeatureHandle::IndexLess> >::const_iterator charge_group_it = charge_mapping.begin(); charge_group_it != charge_mapping.end(); ++charge_group_it) { ConsensusFeature cf; cf.setCharge((*(*charge_group_it).second.begin()).getCharge()); cf.setMetaValue("charge_adducts", charge_group_it->first); std::vector<PeptideIdentification> ids; for (std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fh_it = (charge_group_it->second).begin(); fh_it != (charge_group_it->second).end(); ++fh_it) { cf.insert(*fh_it); // append identifications Size f_index = simulated_features.uniqueIdToIndex(fh_it->getUniqueId()); std::vector<PeptideIdentification> ids_feature = simulated_features[f_index].getPeptideIdentifications(); ids.insert(ids.end(), ids_feature.begin(), ids_feature.end()); } cf.computeMonoisotopicConsensus(); cf.setPeptideIdentifications(ids); new_cm.push_back(cf); } } } new_cm.setProteinIdentifications(simulated_features.getProteinIdentifications()); consensus_.swap(new_cm); consensus_.applyMemberFunction(&UniqueIdInterface::ensureUniqueId); }
ExitCodes main_(int, const char**) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = FileHandler().getTypeByFileName(out); } //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mz_data_file; mz_data_file.setLogType(log_type_); PeakMap ms_peakmap; std::vector<Int> ms_level(1, 1); (mz_data_file.getOptions()).setMSLevels(ms_level); mz_data_file.load(in, ms_peakmap); if (ms_peakmap.size() == 0) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry."; return INCOMPATIBLE_INPUT_DATA; } // make sure that the spectra are sorted by m/z ms_peakmap.sortSpectra(true); //------------------------------------------------------------- // get params for MTD and EPD algorithms //------------------------------------------------------------- Param com_param = getParam_().copy("algorithm:common:", true); writeDebug_("Common parameters passed to both sub-algorithms (mtd and epd)", com_param, 3); Param mtd_param = getParam_().copy("algorithm:mtd:", true); writeDebug_("Parameters passed to MassTraceDetection", mtd_param, 3); Param epd_param = getParam_().copy("algorithm:epd:", true); writeDebug_("Parameters passed to ElutionPeakDetection", epd_param, 3); //------------------------------------------------------------- // configure and run MTD //------------------------------------------------------------- MassTraceDetection mt_ext; mtd_param.insert("", com_param); mtd_param.remove("chrom_fwhm"); mt_ext.setParameters(mtd_param); vector<MassTrace> m_traces; mt_ext.run(ms_peakmap, m_traces); vector<MassTrace> m_traces_final; bool use_epd = epd_param.getValue("enabled").toBool(); if (!use_epd) { swap(m_traces_final, m_traces); } else { ElutionPeakDetection ep_det; epd_param.remove("enabled"); // artificially added above epd_param.insert("", com_param); ep_det.setParameters(epd_param); std::vector<MassTrace> split_mtraces; // note: this step will destroy any meta data annotation (e.g. FWHM_mz_avg) ep_det.detectPeaks(m_traces, split_mtraces); if (ep_det.getParameters().getValue("width_filtering") == "auto") { m_traces_final.clear(); ep_det.filterByPeakWidth(split_mtraces, m_traces_final); LOG_INFO << "Notice: " << split_mtraces.size() - m_traces_final.size() << " of total " << split_mtraces.size() << " were dropped because of too low peak width." << std::endl; } else { swap(m_traces_final, split_mtraces); } } //------------------------------------------------------------- // writing consensus map output //------------------------------------------------------------- if (out_type == FileTypes::CONSENSUSXML) { ConsensusMap consensus_map; StringList ms_runs; ms_peakmap.getPrimaryMSRunPath(ms_runs); consensus_map.setPrimaryMSRunPath(ms_runs); for (Size i = 0; i < m_traces_final.size(); ++i) { if (m_traces_final[i].getSize() == 0) continue; ConsensusFeature fcons; int k = 0; for (MassTrace::const_iterator it = m_traces_final[i].begin(); it != m_traces_final[i].end(); ++it) { FeatureHandle fhandle; fhandle.setRT(it->getRT()); fhandle.setMZ(it->getMZ()); fhandle.setIntensity(it->getIntensity()); fhandle.setUniqueId(++k); fcons.insert(fhandle); } fcons.setMetaValue(3, m_traces_final[i].getLabel()); fcons.setCharge(0); fcons.setWidth(m_traces_final[i].estimateFWHM(use_epd)); fcons.setQuality(1 - (1.0 / m_traces_final[i].getSize())); fcons.setRT(m_traces_final[i].getCentroidRT()); fcons.setMZ(m_traces_final[i].getCentroidMZ()); fcons.setIntensity(m_traces_final[i].getIntensity(false)); consensus_map.push_back(fcons); } consensus_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); addDataProcessing_(consensus_map, getProcessingInfo_(DataProcessing::QUANTITATION)); consensus_map.setUniqueId(); ConsensusXMLFile().store(out, consensus_map); } else //(out_type == FileTypes::FEATUREXML) { //----------------------------------------------------------- // convert mass traces to features //----------------------------------------------------------- std::vector<double> stats_sd; FeatureMap ms_feat_map; StringList ms_runs; ms_peakmap.getPrimaryMSRunPath(ms_runs); ms_feat_map.setPrimaryMSRunPath(ms_runs); for (Size i = 0; i < m_traces_final.size(); ++i) { if (m_traces_final[i].getSize() == 0) continue; m_traces_final[i].updateMeanMZ(); m_traces_final[i].updateWeightedMZsd(); Feature f; f.setMetaValue(3, m_traces_final[i].getLabel()); f.setCharge(0); f.setMZ(m_traces_final[i].getCentroidMZ()); f.setIntensity(m_traces_final[i].getIntensity(false)); f.setRT(m_traces_final[i].getCentroidRT()); f.setWidth(m_traces_final[i].estimateFWHM(use_epd)); f.setOverallQuality(1 - (1.0 / m_traces_final[i].getSize())); f.getConvexHulls().push_back(m_traces_final[i].getConvexhull()); double sd = m_traces_final[i].getCentroidSD(); f.setMetaValue("SD", sd); f.setMetaValue("SD_ppm", sd / f.getMZ() * 1e6); if (m_traces_final[i].fwhm_mz_avg > 0) f.setMetaValue("FWHM_mz_avg", m_traces_final[i].fwhm_mz_avg); stats_sd.push_back(m_traces_final[i].getCentroidSD()); ms_feat_map.push_back(f); } // print some stats about standard deviation of mass traces if (stats_sd.size() > 0) { std::sort(stats_sd.begin(), stats_sd.end()); LOG_INFO << "Mass trace m/z s.d.\n" << " low quartile: " << stats_sd[stats_sd.size() * 1 / 4] << "\n" << " median: " << stats_sd[stats_sd.size() * 1 / 2] << "\n" << " upp quartile: " << stats_sd[stats_sd.size() * 3 / 4] << std::endl; } ms_feat_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info TODO addDataProcessing_(ms_feat_map, getProcessingInfo_(DataProcessing::QUANTITATION)); //ms_feat_map.setUniqueId(); FeatureXMLFile().store(out, ms_feat_map); } return EXECUTION_OK; }
void EDTAFile::load(const String& filename, ConsensusMap& consensus_map) { // load input TextFile input(filename); TextFile::ConstIterator input_it = input.begin(); // reset map consensus_map = ConsensusMap(); consensus_map.setUniqueId(); char separator = ' '; if (input_it->hasSubstring("\t")) separator = '\t'; else if (input_it->hasSubstring(" ")) separator = ' '; else if (input_it->hasSubstring(",")) separator = ','; // parsing header line std::vector<String> headers; input_it->split(separator, headers); int offset = 0; for (Size i = 0; i < headers.size(); ++i) { headers[i].trim(); } String header_trimmed = *input.begin(); header_trimmed.trim(); enum { TYPE_UNDEFINED, TYPE_OLD_NOCHARGE, TYPE_OLD_CHARGE, TYPE_CONSENSUS } input_type = TYPE_UNDEFINED; Size input_features = 1; double rt = 0.0; double mz = 0.0; double it = 0.0; Int ch = 0; if (headers.size() <= 2) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: not enough columns! Expected at least 3 columns!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } else if (headers.size() == 3) input_type = TYPE_OLD_NOCHARGE; else if (headers.size() == 4) input_type = TYPE_OLD_CHARGE; // see if we have a header try { // try to convert... if not: thats a header rt = headers[0].toDouble(); mz = headers[1].toDouble(); it = headers[2].toDouble(); } catch (Exception::BaseException&) { offset = 1; ++input_it; LOG_INFO << "Detected a header line.\n"; } if (headers.size() >= 5) { if (String(headers[4].trim()).toUpper() == "RT1") input_type = TYPE_CONSENSUS; else input_type = TYPE_OLD_CHARGE; } if (input_type == TYPE_CONSENSUS) { // Every consensus style line includes features with four columns. // The remainder is meta data input_features = headers.size() / 4; } if (offset == 0 && (input_type == TYPE_OLD_CHARGE || input_type == TYPE_CONSENSUS)) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: No HEADER provided. This is only allowed for three columns. You have more!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } SignedSize input_size = input.end() - input.begin(); ConsensusMap::FileDescription desc; desc.filename = filename; desc.size = (input_size) - offset; consensus_map.getFileDescriptions()[0] = desc; // parsing features consensus_map.reserve(input_size); for (; input_it != input.end(); ++input_it) { //do nothing for empty lines String line_trimmed = *input_it; line_trimmed.trim(); if (line_trimmed == "") { if ((input_it - input.begin()) < input_size - 1) LOG_WARN << "Notice: Empty line ignored (line " << ((input_it - input.begin()) + 1) << ")."; continue; } //split line to tokens std::vector<String> parts; input_it->split(separator, parts); //abort if line does not contain enough fields if (parts.size() < 3) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": At least three columns are needed! (got " + String(parts.size()) + ")\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } ConsensusFeature cf; cf.setUniqueId(); try { // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, 0); mz = checkedToDouble_(parts, 1); it = checkedToDouble_(parts, 2); ch = checkedToInt_(parts, 3); cf.setRT(rt); cf.setMZ(mz); cf.setIntensity(it); if (input_type != TYPE_OLD_NOCHARGE) cf.setCharge(ch); } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert the first three columns to a number!\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } // Check all features in one line for (Size j = 1; j < input_features; ++j) { try { Feature f; f.setUniqueId(); // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, j * 4 + 0); mz = checkedToDouble_(parts, j * 4 + 1); it = checkedToDouble_(parts, j * 4 + 2); ch = checkedToInt_(parts, j * 4 + 3); // Only accept features with at least RT and MZ set if (rt != -1 && mz != -1) { f.setRT(rt); f.setMZ(mz); f.setIntensity(it); f.setCharge(ch); cf.insert(j - 1, f); } } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert one of the four sub-feature columns (starting at column " + (j * 4 + 1) + ") to a number! Is the correct separator specified?\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } } //parse meta data for (Size j = input_features * 4; j < parts.size(); ++j) { String part_trimmed = parts[j]; part_trimmed.trim(); if (part_trimmed != "") { //check if column name is ok if (headers.size() <= j || headers[j] == "") { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Error: Missing meta data header for column ") + (j + 1) + "!" + String("Offending header line: '") + header_trimmed + "' (line 1)"); } //add meta value cf.setMetaValue(headers[j], part_trimmed); } } //insert feature to map consensus_map.push_back(cf); } // register FileDescriptions ConsensusMap::FileDescription fd; fd.filename = filename; fd.size = consensus_map.size(); Size maps = std::max(input_features - 1, Size(1)); // its either a simple feature or a consensus map // (in this case the 'input_features' includes the centroid, which we do not count) for (Size i = 0; i < maps; ++i) { fd.label = String("EDTA_Map ") + String(i); consensus_map.getFileDescriptions()[i] = fd; } }