//TODO include run information for each peptide //includes all MSMS derived peptides into the graph --consensusXML Size ProteinResolver::includeMSMSPeptides_(ConsensusMap & consensus, vector<PeptideEntry> & peptide_nodes) { Size found_peptide = 0; for (Size pep = 0; pep != consensus.size(); ++pep) { ConsensusFeature & feature = consensus.at(pep); // get all peptide identifications const vector<PeptideIdentification> & pep_id = feature.getPeptideIdentifications(); for (Size cons_pep = 0; cons_pep < pep_id.size(); ++cons_pep) { String seq = pep_id.at(cons_pep).getHits().front().getSequence().toUnmodifiedString(); Size peptide_entry = findPeptideEntry_(seq, peptide_nodes); if (peptide_entry != peptide_nodes.size()) { if (!peptide_nodes.at(peptide_entry).experimental) { ++found_peptide; } //should be changed -- for consensus peptide_identification is the consensus and peptide_hit is the PeptideIdentification. PeptideHit is only top hit at the moment peptide_nodes.at(peptide_entry).peptide_identification = pep; peptide_nodes.at(peptide_entry).peptide_hit = cons_pep; //only top hit is used at the moment peptide_nodes.at(peptide_entry).experimental = true; // get intensity of the feature peptide_nodes.at(peptide_entry).intensity = feature.getIntensity(); peptide_nodes.at(peptide_entry).origin = feature.getMetaValue("file_origin"); } } } return found_peptide; }
void EDTAFile::store(const String& filename, const ConsensusMap& map) const { TextFile tf; // search for maximum number of sub-features (since this determines the number of columns) Size max_sub(0); for (Size i = 0; i < map.size(); ++i) { max_sub = std::max(max_sub, map[i].getFeatures().size()); } // write header String header("RT\tm/z\tintensity\tcharge"); for (Size i = 1; i <= max_sub; ++i) { header += "\tRT" + String(i) + "\tm/z" + String(i) + "\tintensity" + String(i) + "\tcharge" + String(i); } tf.addLine(header); for (Size i = 0; i < map.size(); ++i) { ConsensusFeature f = map[i]; // consensus String entry = String(f.getRT()) + "\t" + f.getMZ() + "\t" + f.getIntensity() + "\t" + f.getCharge(); // sub-features ConsensusFeature::HandleSetType handle = f.getFeatures(); for (ConsensusFeature::HandleSetType::const_iterator it = handle.begin(); it != handle.end(); ++it) { entry += String("\t") + it->getRT() + "\t" + it->getMZ() + "\t" + it->getIntensity() + "\t" + it->getCharge(); } // missing sub-features for (Size j = handle.size(); j < max_sub; ++j) { entry += "\tNA\tNA\tNA\tNA"; } tf.addLine(entry); } tf.store(filename); }
void IsobaricQuantifier::computeLabelingStatistics_(ConsensusMap& consensus_map_out) { // number of total quantified spectra stats_.number_ms2_total = consensus_map_out.size(); // Labeling efficiency statistics for (size_t i = 0; i < consensus_map_out.size(); ++i) { // is whole scan empty?! if (consensus_map_out[i].getIntensity() == 0) ++stats_.number_ms2_empty; // look at single reporters for (ConsensusFeature::HandleSetType::const_iterator it_elements = consensus_map_out[i].begin(); it_elements != consensus_map_out[i].end(); ++it_elements) { if (it_elements->getIntensity() == 0) { String ch_index = consensus_map_out.getFileDescriptions()[it_elements->getMapIndex()].getMetaValue("channel_name"); ++stats_.empty_channels[ch_index]; } } } LOG_INFO << "IsobaricQuantifier: skipped " << stats_.number_ms2_empty << " of " << consensus_map_out.size() << " selected scans due to lack of reporter information:\n"; consensus_map_out.setMetaValue("isoquant:scans_noquant", stats_.number_ms2_empty); consensus_map_out.setMetaValue("isoquant:scans_total", consensus_map_out.size()); LOG_INFO << "IsobaricQuantifier: channels with signal\n"; for (std::map<String, Size>::const_iterator it_m = stats_.empty_channels.begin(); it_m != stats_.empty_channels.end(); ++it_m) { LOG_INFO << " channel " << it_m->first << ": " << (consensus_map_out.size() - it_m->second) << " / " << consensus_map_out.size() << " (" << ((consensus_map_out.size() - it_m->second) * 100 / consensus_map_out.size()) << "%)\n"; consensus_map_out.setMetaValue(String("isoquant:quantifyable_ch") + it_m->first, (consensus_map_out.size() - it_m->second)); } }
void ConsensusMapNormalizerAlgorithmThreshold::normalizeMaps(ConsensusMap& map, const vector<double>& ratios) { ConsensusMap::Iterator cf_it; ProgressLogger progresslogger; progresslogger.setLogType(ProgressLogger::CMD); progresslogger.startProgress(0, map.size(), "normalizing maps"); for (cf_it = map.begin(); cf_it != map.end(); ++cf_it) { progresslogger.setProgress(cf_it - map.begin()); ConsensusFeature::HandleSetType::const_iterator f_it; for (f_it = cf_it->getFeatures().begin(); f_it != cf_it->getFeatures().end(); ++f_it) { f_it->asMutable().setIntensity(f_it->getIntensity() * ratios[f_it->getMapIndex()]); } } progresslogger.endProgress(); }
void IsobaricChannelExtractor::registerChannelsInOutputMap_(ConsensusMap& consensus_map) { // register the individual channels in the output consensus map Int index = 0; for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin(); cl_it != quant_method_->getChannelInformation().end(); ++cl_it) { ConsensusMap::FileDescription channel_as_map; // label is the channel + description provided in the Params channel_as_map.label = quant_method_->getName() + "_" + cl_it->name; // TODO(aiche): number of features need to be set later channel_as_map.size = consensus_map.size(); // add some more MetaInfo channel_as_map.setMetaValue("channel_name", cl_it->name); channel_as_map.setMetaValue("channel_id", cl_it->id); channel_as_map.setMetaValue("channel_description", cl_it->description); channel_as_map.setMetaValue("channel_center", cl_it->center); consensus_map.getFileDescriptions()[index++] = channel_as_map; } }
feat6.setPosition(pos6); feat6.setIntensity(400.0f); feat6.setUniqueId(2); ConsensusFeature cons4(1,feat4); ConsensusFeature cons5(1,feat5); ConsensusFeature cons6(1,feat6); input[1].push_back(cons4); input[1].push_back(cons5); input[1].push_back(cons6); StablePairFinder spf; Param param = spf.getDefaults(); spf.setParameters(param); ConsensusMap result; spf.run(input,result); TEST_EQUAL(result.size(),3); ABORT_IF(result.size()!=3); ConsensusFeature::HandleSetType group1 = result[0].getFeatures(); ConsensusFeature::HandleSetType group2 = result[1].getFeatures(); ConsensusFeature::HandleSetType group3 = result[2].getFeatures(); FeatureHandle ind1(0,feat1); FeatureHandle ind2(0,feat2); FeatureHandle ind3(0,feat3); FeatureHandle ind4(1,feat4); FeatureHandle ind5(1,feat5); FeatureHandle ind6(1,feat6); ConsensusFeature::HandleSetType::const_iterator it; it = group1.begin();
vector<double> ConsensusMapNormalizerAlgorithmThreshold::computeCorrelation(const ConsensusMap& map, const double& ratio_threshold, const String& acc_filter, const String& desc_filter) { Size number_of_features = map.size(); Size number_of_maps = map.getFileDescriptions().size(); vector<vector<double> > feature_int(number_of_maps); //get map with most features, resize feature_int UInt map_with_most_features_idx = 0; ConsensusMap::FileDescriptions::const_iterator map_with_most_features = map.getFileDescriptions().find(0); for (UInt i = 0; i < number_of_maps; i++) { feature_int[i].resize(number_of_features); ConsensusMap::FileDescriptions::const_iterator it = map.getFileDescriptions().find(i); if (it->second.size > map_with_most_features->second.size) { map_with_most_features = it; map_with_most_features_idx = i; } } //fill feature_int with intensities Size pass_counter = 0; ConsensusMap::ConstIterator cf_it; UInt idx = 0; for (cf_it = map.begin(); cf_it != map.end(); ++cf_it, ++idx) { if (!ConsensusMapNormalizerAlgorithmMedian::passesFilters_(cf_it, map, acc_filter, desc_filter)) { continue; } ++pass_counter; ConsensusFeature::HandleSetType::const_iterator f_it; for (f_it = cf_it->getFeatures().begin(); f_it != cf_it->getFeatures().end(); ++f_it) { feature_int[f_it->getMapIndex()][idx] = f_it->getIntensity(); } } LOG_INFO << endl << "Using " << pass_counter << "/" << map.size() << " consensus features for computing normalization coefficients" << endl << endl; //determine ratio vector<double> ratio_vector(number_of_maps); for (UInt j = 0; j < number_of_maps; j++) { vector<double> ratios; for (UInt k = 0; k < number_of_features; ++k) { if (feature_int[map_with_most_features_idx][k] != 0.0 && feature_int[j][k] != 0.0) { double ratio = feature_int[map_with_most_features_idx][k] / feature_int[j][k]; if (ratio > ratio_threshold && ratio < 1 / ratio_threshold) { ratios.push_back(ratio); } } } if (ratios.empty()) { LOG_WARN << endl << "Not enough features passing filters. Cannot compute normalization coefficients for all maps. Result will be unnormalized." << endl << endl; return vector<double>(number_of_maps, 1.0); } ratio_vector[j] = Math::mean(ratios.begin(), ratios.end()); } return ratio_vector; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input file names String in = getStringOption_("in"); //input file type FileHandler fh; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } //output file names and types String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } bool TIC_DTA2D = getFlag_("TIC_DTA2D"); writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 1); //------------------------------------------------------------- // reading input //------------------------------------------------------------- typedef MSExperiment<Peak1D> MSExperimentType; MSExperimentType exp; typedef MSExperimentType::SpectrumType SpectrumType; typedef FeatureMap<> FeatureMapType; FeatureMapType fm; ConsensusMap cm; writeDebug_(String("Loading input file"), 1); if (in_type == FileTypes::CONSENSUSXML) { ConsensusXMLFile().load(in, cm); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You you will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting consensus features to peaks. You will lose information!"); exp.set2DData(cm); } } else if (in_type == FileTypes::EDTA) { EDTAFile().load(in, cm); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You you will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting consensus features to peaks. You will lose information!"); exp.set2DData(cm); } } else if (in_type == FileTypes::FEATUREXML || in_type == FileTypes::TSV || in_type == FileTypes::PEPLIST || in_type == FileTypes::KROENIK) { fh.loadFeatures(in, fm, in_type); fm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting features to peaks. You will lose information! Mass traces are added, if present as 'num_of_masstraces' and 'masstrace_intensity_<X>' (X>=0) meta values."); exp.set2DData<true>(fm); } } else { fh.loadExperiment(in, exp, in_type, log_type_); } //------------------------------------------------------------- // writing output //------------------------------------------------------------- writeDebug_(String("Writing output file"), 1); if (out_type == FileTypes::MZML) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZML)); MzMLFile f; f.setLogType(log_type_); ChromatogramTools().convertSpectraToChromatograms(exp, true); f.store(out, exp); } else if (out_type == FileTypes::MZDATA) { //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZDATA)); MzDataFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); f.store(out, exp); } else if (out_type == FileTypes::MZXML) { //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZXML)); MzXMLFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); f.store(out, exp); } else if (out_type == FileTypes::DTA2D) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); DTA2DFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); if (TIC_DTA2D) { // store the total ion chromatogram (TIC) f.storeTIC(out, exp); } else { // store entire experiment f.store(out, exp); } } else if (out_type == FileTypes::MGF) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); MascotGenericFile f; f.setLogType(log_type_); f.store(out, exp); } else if (out_type == FileTypes::FEATUREXML) { if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) || (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK)) { fm.applyMemberFunction(&UniqueIdInterface::setUniqueId); } else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA) { ConsensusMap::convert(cm, true, fm); } else // not loaded as feature map or consensus map { // The feature specific information is only defaulted. Enough reasons to issue a warning! writeLog_("Warning: Converting peaks to features will lead to incomplete features!"); fm.clear(); fm.reserve(exp.getSize()); typedef FeatureMapType::FeatureType FeatureType; FeatureType feature; feature.setQuality(0, 1); // override default feature.setQuality(1, 1); // override default feature.setOverallQuality(1); // override default for (MSExperimentType::ConstIterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter ) { feature.setRT(spec_iter->getRT()); for (SpectrumType::ConstIterator peak1_iter = spec_iter->begin(); peak1_iter != spec_iter->end(); ++peak1_iter ) { feature.setMZ(peak1_iter->getMZ()); feature.setIntensity(peak1_iter->getIntensity()); feature.setUniqueId(); fm.push_back(feature); } } fm.updateRanges(); } addDataProcessing_(fm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); FeatureXMLFile().store(out, fm); } else if (out_type == FileTypes::CONSENSUSXML) { if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) || (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK)) { fm.applyMemberFunction(&UniqueIdInterface::setUniqueId); ConsensusMap::convert(0, fm, cm); } // nothing to do for consensus input else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA) { } else // experimental data { ConsensusMap::convert(0, exp, cm, exp.size()); } addDataProcessing_(cm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); ConsensusXMLFile().store(out, cm); } else if (out_type == FileTypes::EDTA) { if (fm.size() > 0 && cm.size() > 0) { LOG_ERROR << "Internal error: cannot decide on container (Consensus or Feature)! This is a bug. Please report it!"; return INTERNAL_ERROR; } if (fm.size() > 0) EDTAFile().store(out, fm); else if (cm.size() > 0) EDTAFile().store(out, cm); } else { writeLog_("Unknown output file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } return EXECUTION_OK; }
void MapAlignmentEvaluationAlgorithmPrecision::evaluate(const ConsensusMap & consensus_map_in, const ConsensusMap & consensus_map_gt, const double & rt_dev, const double & mz_dev, const Peak2D::IntensityType & int_dev, const bool use_charge, double & out) { //Precision = 1/N * sum ( gt_subtend_tilde_tool_i / tilde_tool_i ) ConsensusMap cons_map_gt; /* = consensus_map_gt; */ for (Size i = 0; i < consensus_map_gt.size(); ++i) { if (consensus_map_gt[i].size() >= 2) { cons_map_gt.push_back(consensus_map_gt[i]); } } ConsensusMap cons_map_tool = consensus_map_in; std::vector<Size> gt_subtend_tilde_tool; //holds the numerators of the sum std::vector<Size> tilde_tool; //holds the denominators of the sum Size gt_subtend_tilde_tool_i = 0; //filling material for the vectors Size tilde_tool_i = 0; Size cons_tool_size = 0; //size of the actual consensus feature of the tool Size gt_i_subtend_tool_j = 0; //size of the intersection of the actual cons. feat. of the tool with the c.f. of GT double precision = 0; //holds the output double sum = 0; //intermediate step: the sum //loop over all consensus features of the ground truth for (Size i = 0; i < cons_map_gt.size(); ++i) //N = cons_map_gt.size() { ConsensusFeature & gt_elem = cons_map_gt[i]; //for every i = 1, ..., N: gt_subtend_tilde_tool_i = 0; tilde_tool_i = 0; //loop over all consensus features of the tool's consensus map for (Size j = 0; j < cons_map_tool.size(); ++j) { ConsensusFeature & tool_elem = cons_map_tool[j]; cons_tool_size = cons_map_tool[j].size(); gt_i_subtend_tool_j = 0; //loop over all features in the ith consensus feature of the gt for (HandleIterator gt_it = gt_elem.begin(); gt_it != gt_elem.end(); ++gt_it) { //loop over all features in the jth consensus feature of the tool's map for (HandleIterator tool_it = tool_elem.begin(); tool_it != tool_elem.end(); ++tool_it) { //++cons_tool_size; if (isSameHandle(*tool_it, *gt_it, rt_dev, mz_dev, int_dev, use_charge)) { ++gt_i_subtend_tool_j; break; } } } if ((cons_tool_size >= 2) && (gt_i_subtend_tool_j > 0)) { gt_subtend_tilde_tool_i += gt_i_subtend_tool_j; tilde_tool_i += cons_tool_size; } } gt_subtend_tilde_tool.push_back(gt_subtend_tilde_tool_i); tilde_tool.push_back(tilde_tool_i); } for (Size k = 0; k < gt_subtend_tilde_tool.size(); ++k) { double fraction = 0; //intermediate step: the fraction if (gt_subtend_tilde_tool[k] != 0) { fraction = double(gt_subtend_tilde_tool[k]) / double(tilde_tool[k]); } sum += fraction; } precision = (1.0 / double(cons_map_gt.size())) * sum; out = precision; }
p.setValue("mz_pair_dists",ListUtils::create<double>(4.0)); p.setValue("mz_dev",0.6); pm.setParameters(p); ConsensusMap output; TEST_EXCEPTION(Exception::IllegalArgument,pm.run(vector<ConsensusMap>(),output)); vector<ConsensusMap> input(1); MapConversion::convert(5,features,input[0]); output.getColumnHeaders()[5].label = "light"; output.getColumnHeaders()[5].filename = "filename"; output.getColumnHeaders()[8] = output.getColumnHeaders()[5]; output.getColumnHeaders()[8].label = "heavy"; pm.run(input,output); TEST_EQUAL(output.size(),1); ABORT_IF(output.size()!=1) TEST_REAL_SIMILAR(output[0].begin()->getMZ(),1.0f); TEST_REAL_SIMILAR(output[0].begin()->getRT(),1.0f); TEST_REAL_SIMILAR(output[0].rbegin()->getMZ(),5.0f); TEST_REAL_SIMILAR(output[0].rbegin()->getRT(),1.5f); TEST_REAL_SIMILAR(output[0].getQuality(),0.959346); TEST_EQUAL(output[0].getCharge(),1); //test automated RT parameter estimation LabeledPairFinder pm2; Param p2; p2.setValue("rt_estimate","true"); p2.setValue("mz_pair_dists", ListUtils::create<double>(4.0)); p2.setValue("mz_dev",0.2); pm2.setParameters(p2);
TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_center"), 115.1082) TEST_EQUAL(cm_out.getFileDescriptions()[2].label, "itraq4plex_116") TEST_EQUAL(cm_out.getFileDescriptions()[2].getMetaValue("channel_name"), 116) TEST_EQUAL(cm_out.getFileDescriptions()[2].getMetaValue("channel_id"), 2) TEST_EQUAL(cm_out.getFileDescriptions()[2].getMetaValue("channel_description"), "else") TEST_EQUAL(cm_out.getFileDescriptions()[2].getMetaValue("channel_center"), 116.1116) TEST_EQUAL(cm_out.getFileDescriptions()[3].label, "itraq4plex_117") TEST_EQUAL(cm_out.getFileDescriptions()[3].getMetaValue("channel_name"), 117) TEST_EQUAL(cm_out.getFileDescriptions()[3].getMetaValue("channel_id"), 3) TEST_EQUAL(cm_out.getFileDescriptions()[3].getMetaValue("channel_description"), "") TEST_EQUAL(cm_out.getFileDescriptions()[3].getMetaValue("channel_center"), 117.1149) // compare results TEST_EQUAL(cm_out.size(), 5) ABORT_IF(cm_out.size() != 5) ConsensusFeature::iterator cf_it; TEST_EQUAL(cm_out[0].size(), 4) TEST_EQUAL(cm_out[0].getMetaValue("scan_id"), "controllerType=0 controllerNumber=1 scan=2") TEST_REAL_SIMILAR(cm_out[0].getMetaValue("precursor_intensity"), 5251952.5) TEST_REAL_SIMILAR(cm_out[0].getMetaValue("precursor_charge"), 2) TEST_REAL_SIMILAR(cm_out[0].getIntensity(), 1490501.21) cf_it = cm_out[0].begin(); TEST_REAL_SIMILAR(cf_it->getIntensity(), 643005.56) ++cf_it; TEST_REAL_SIMILAR(cf_it->getIntensity(), 458708.97) ++cf_it; TEST_REAL_SIMILAR(cf_it->getIntensity(), 182238.38) ++cf_it;
void EDTAFile::load(const String& filename, ConsensusMap& consensus_map) { // load input TextFile input(filename); TextFile::ConstIterator input_it = input.begin(); // reset map consensus_map = ConsensusMap(); consensus_map.setUniqueId(); char separator = ' '; if (input_it->hasSubstring("\t")) separator = '\t'; else if (input_it->hasSubstring(" ")) separator = ' '; else if (input_it->hasSubstring(",")) separator = ','; // parsing header line std::vector<String> headers; input_it->split(separator, headers); int offset = 0; for (Size i = 0; i < headers.size(); ++i) { headers[i].trim(); } String header_trimmed = *input.begin(); header_trimmed.trim(); enum { TYPE_UNDEFINED, TYPE_OLD_NOCHARGE, TYPE_OLD_CHARGE, TYPE_CONSENSUS } input_type = TYPE_UNDEFINED; Size input_features = 1; double rt = 0.0; double mz = 0.0; double it = 0.0; Int ch = 0; if (headers.size() <= 2) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: not enough columns! Expected at least 3 columns!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } else if (headers.size() == 3) input_type = TYPE_OLD_NOCHARGE; else if (headers.size() == 4) input_type = TYPE_OLD_CHARGE; // see if we have a header try { // try to convert... if not: thats a header rt = headers[0].toDouble(); mz = headers[1].toDouble(); it = headers[2].toDouble(); } catch (Exception::BaseException&) { offset = 1; ++input_it; LOG_INFO << "Detected a header line.\n"; } if (headers.size() >= 5) { if (String(headers[4].trim()).toUpper() == "RT1") input_type = TYPE_CONSENSUS; else input_type = TYPE_OLD_CHARGE; } if (input_type == TYPE_CONSENSUS) { // Every consensus style line includes features with four columns. // The remainder is meta data input_features = headers.size() / 4; } if (offset == 0 && (input_type == TYPE_OLD_CHARGE || input_type == TYPE_CONSENSUS)) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: No HEADER provided. This is only allowed for three columns. You have more!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } SignedSize input_size = input.end() - input.begin(); ConsensusMap::FileDescription desc; desc.filename = filename; desc.size = (input_size) - offset; consensus_map.getFileDescriptions()[0] = desc; // parsing features consensus_map.reserve(input_size); for (; input_it != input.end(); ++input_it) { //do nothing for empty lines String line_trimmed = *input_it; line_trimmed.trim(); if (line_trimmed == "") { if ((input_it - input.begin()) < input_size - 1) LOG_WARN << "Notice: Empty line ignored (line " << ((input_it - input.begin()) + 1) << ")."; continue; } //split line to tokens std::vector<String> parts; input_it->split(separator, parts); //abort if line does not contain enough fields if (parts.size() < 3) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": At least three columns are needed! (got " + String(parts.size()) + ")\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } ConsensusFeature cf; cf.setUniqueId(); try { // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, 0); mz = checkedToDouble_(parts, 1); it = checkedToDouble_(parts, 2); ch = checkedToInt_(parts, 3); cf.setRT(rt); cf.setMZ(mz); cf.setIntensity(it); if (input_type != TYPE_OLD_NOCHARGE) cf.setCharge(ch); } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert the first three columns to a number!\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } // Check all features in one line for (Size j = 1; j < input_features; ++j) { try { Feature f; f.setUniqueId(); // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, j * 4 + 0); mz = checkedToDouble_(parts, j * 4 + 1); it = checkedToDouble_(parts, j * 4 + 2); ch = checkedToInt_(parts, j * 4 + 3); // Only accept features with at least RT and MZ set if (rt != -1 && mz != -1) { f.setRT(rt); f.setMZ(mz); f.setIntensity(it); f.setCharge(ch); cf.insert(j - 1, f); } } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert one of the four sub-feature columns (starting at column " + (j * 4 + 1) + ") to a number! Is the correct separator specified?\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } } //parse meta data for (Size j = input_features * 4; j < parts.size(); ++j) { String part_trimmed = parts[j]; part_trimmed.trim(); if (part_trimmed != "") { //check if column name is ok if (headers.size() <= j || headers[j] == "") { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Error: Missing meta data header for column ") + (j + 1) + "!" + String("Offending header line: '") + header_trimmed + "' (line 1)"); } //add meta value cf.setMetaValue(headers[j], part_trimmed); } } //insert feature to map consensus_map.push_back(cf); } // register FileDescriptions ConsensusMap::FileDescription fd; fd.filename = filename; fd.size = consensus_map.size(); Size maps = std::max(input_features - 1, Size(1)); // its either a simple feature or a consensus map // (in this case the 'input_features' includes the centroid, which we do not count) for (Size i = 0; i < maps; ++i) { fd.label = String("EDTA_Map ") + String(i); consensus_map.getFileDescriptions()[i] = fd; } }
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits().size(), 2) TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[0].getSequence(), "C") TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[1].getSequence(), "D") TEST_EQUAL(map[1].getPeptideIdentifications().size(), 1) TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits()[0].getSequence(), "E") //unassigned peptide identifications TEST_EQUAL(map.getUnassignedPeptideIdentifications().size(), 2) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits()[0].getSequence(), "F") TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits().size(), 2) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[0].getSequence(), "G") TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[1].getSequence(), "H") //features TEST_EQUAL(map.size(), 6) ConsensusFeature cons_feature = map[0]; TEST_REAL_SIMILAR(cons_feature.getRT(), 1273.27) TEST_REAL_SIMILAR(cons_feature.getMZ(), 904.47) TEST_REAL_SIMILAR(cons_feature.getIntensity(), 3.12539e+07) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1273.27) TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1273.27) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 904.47) TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[1], 904.47) TEST_REAL_SIMILAR(cons_feature.getIntensityRange().minPosition()[0], 3.12539e+07) TEST_REAL_SIMILAR(cons_feature.getIntensityRange().maxPosition()[0], 3.12539e+07) TEST_REAL_SIMILAR(cons_feature.getQuality(), 1.1) TEST_EQUAL(cons_feature.getMetaValue("peptide_id") == DataValue("RefSeq:NC_1234"), true) ConsensusFeature::HandleSetType::const_iterator it = cons_feature.begin(); TEST_REAL_SIMILAR(it->getIntensity(), 3.12539e+07)
ExitCodes outputTo(ostream& os) { //------------------------------------------------------------- // Parameter handling //------------------------------------------------------------- // File names String in = getStringOption_("in"); // File type FileHandler fh; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } MSExperiment<Peak1D> exp; FeatureMap feat; ConsensusMap cons; if (in_type == FileTypes::FEATUREXML) //features { FeatureXMLFile().load(in, feat); feat.updateRanges(); } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { ConsensusXMLFile().load(in, cons); cons.updateRanges(); } //------------------------------------------------------------- // meta information //------------------------------------------------------------- if (getFlag_("m")) { os << endl << "-- General information --" << endl << endl << "file name: " << in << endl << "file type: " << FileTypes::typeToName(in_type) << endl; //basic info os << endl << "-- Meta information --" << endl << endl; if (in_type == FileTypes::FEATUREXML) //features { os << "Document id : " << feat.getIdentifier() << endl << endl; } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { os << "Document id : " << cons.getIdentifier() << endl << endl; } } //------------------------------------------------------------- // data processing //------------------------------------------------------------- if (getFlag_("p")) { //basic info os << endl << "-- Data processing information --" << endl << endl; //get data processing info vector<DataProcessing> dp; if (in_type == FileTypes::FEATUREXML) //features { dp = feat.getDataProcessing(); } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { dp = cons.getDataProcessing(); } int i = 0; for (vector<DataProcessing>::iterator it = dp.begin(); it != dp.end(); ++it) { os << "Data processing " << i << endl; os << "\tcompletion_time: " << (*it).getCompletionTime().getDate() << 'T' << (*it).getCompletionTime().getTime() << endl; os << "\tsoftware name: " << (*it).getSoftware().getName() << " version " << (*it).getSoftware().getVersion() << endl; for (set<DataProcessing::ProcessingAction>::const_iterator paIt = (*it).getProcessingActions().begin(); paIt != (*it).getProcessingActions().end(); ++paIt) { os << "\t\tprocessing action: " << DataProcessing::NamesOfProcessingAction[*paIt] << endl; } } ++i; } //------------------------------------------------------------- // statistics //------------------------------------------------------------- if (getFlag_("s")) { //------------------------------------------------------------- // Content statistics //------------------------------------------------------------- Map<String, int> meta_names; if (in_type == FileTypes::FEATUREXML) //features { os << "Number of features: " << feat.size() << endl << endl << "Ranges:" << endl << " retention time: " << String::number(feat.getMin()[Peak2D::RT], 2) << " : " << String::number(feat.getMax()[Peak2D::RT], 2) << endl << " mass-to-charge: " << String::number(feat.getMin()[Peak2D::MZ], 2) << " : " << String::number(feat.getMax()[Peak2D::MZ], 2) << endl << " intensity: " << String::number(feat.getMinInt(), 2) << " : " << String::number(feat.getMaxInt(), 2) << endl << endl; // Charge distribution Map<UInt, UInt> charges; for (Size i = 0; i < feat.size(); ++i) { charges[feat[i].getCharge()]++; } os << "Charge distribution" << endl; for (Map<UInt, UInt>::const_iterator it = charges.begin(); it != charges.end(); ++it) { os << "charge " << it->first << ": " << it->second << endl; } } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = cons.begin(); cmit != cons.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } os << endl << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { os << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } os << " total: " << setw(6) << cons.size() << endl << endl; os << "Ranges:" << endl << " retention time: " << String::number(cons.getMin()[Peak2D::RT], 2) << " : " << String::number(cons.getMax()[Peak2D::RT], 2) << endl << " mass-to-charge: " << String::number(cons.getMin()[Peak2D::MZ], 2) << " : " << String::number(cons.getMax()[Peak2D::MZ], 2) << endl << " intensity: " << String::number(cons.getMinInt(), 2) << " : " << String::number(cons.getMaxInt(), 2) << endl; // file descriptions const ConsensusMap::FileDescriptions& descs = cons.getFileDescriptions(); if (!descs.empty()) { os << endl << "File descriptions:" << endl; for (ConsensusMap::FileDescriptions::const_iterator it = descs.begin(); it != descs.end(); ++it) { os << " - " << it->second.filename << endl << " identifier: " << it->first << endl << " label : " << it->second.label << endl << " size : " << it->second.size << endl; } } } os << endl << "-- Summary Statistics --" << endl << endl; } if (in_type == FileTypes::FEATUREXML) //features { feat.sortByRT(); vector<double> slice_stats; Size n = getIntOption_("n"); Size begin = 0; Size end = 0; os << "#slice\tRT_begin\tRT_end\tnumber_of_features\ttic\t" << "int_mean\tint_stddev\tint_min\tint_max\tint_median\tint_lowerq\tint_upperq\t" << "mz_mean\tmz_stddev\tmz_min\tmz_max\tmz_median\tmz_lowerq\tmz_upperq\t" << "width_mean\twidth_stddev\twidth_min\twidth_max\twidth_median\twidth_lowerq\twidth_upperq\t" << "qual_mean\tqual_stddev\tqual_min\tqual_max\tqual_median\tqual_lowerq\tqual_upperq\t" << "rt_qual_mean\trt_qual_stddev\trt_qual_min\trt_qual_max\trt_qual_median\trt_qual_lowerq\trt_qual_upperq\t" << "mz_qual_mean\tmz_qual_stddev\tmz_qual_min\tmz_qual_max\tmz_qual_median\tmz_qual_lowerq\tmz_qual_upperq" << endl; double rt_begin = 0.0; for (Size slice = 0; slice < n; ++slice) { // Determine slice boundaries. double rt_end = feat.back().getRT() / (double)n * (slice + 1); for (end = begin; end < feat.size() && feat[end].getRT() < rt_end; ++end) {} // Compute statistics on all features in this slice. slice_stats = sliceStatistics(feat, begin, end); // Write the beginning and end of the slices to the output as well as the slice index. os << slice << "\t" << rt_begin << "\t" << rt_end << "\t" << end - begin << "\t"; // Write the statistics as a line of an csv file copy(slice_stats.begin(), slice_stats.end(), ostream_iterator<double>(os, "\t")); os << endl; begin = end; rt_begin = rt_end; } } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { Size size = cons.size(); vector<double> intensities; intensities.reserve(size); vector<double> qualities(size); qualities.reserve(size); vector<double> widths(size); widths.reserve(size); vector<double> rt_delta_by_elems; vector<double> rt_aad_by_elems; vector<double> rt_aad_by_cfs; rt_aad_by_cfs.reserve(size); vector<double> mz_delta_by_elems; vector<double> mz_aad_by_elems; vector<double> mz_aad_by_cfs; mz_aad_by_cfs.reserve(size); vector<double> it_delta_by_elems; vector<double> it_aad_by_elems; vector<double> it_aad_by_cfs; it_aad_by_cfs.reserve(size); for (ConsensusMap::const_iterator cm_iter = cons.begin(); cm_iter != cons.end(); ++cm_iter) { double rt_aad = 0; double mz_aad = 0; double it_aad = 0; intensities.push_back(cm_iter->getIntensity()); qualities.push_back(cm_iter->getQuality()); widths.push_back(cm_iter->getWidth()); for (ConsensusFeature::HandleSetType::const_iterator hs_iter = cm_iter->begin(); hs_iter != cm_iter->end(); ++hs_iter) { double rt_diff = hs_iter->getRT() - cm_iter->getRT(); rt_delta_by_elems.push_back(rt_diff); if (rt_diff < 0) { rt_diff = -rt_diff; } rt_aad_by_elems.push_back(rt_diff); rt_aad += rt_diff; double mz_diff = hs_iter->getMZ() - cm_iter->getMZ(); mz_delta_by_elems.push_back(mz_diff); if (mz_diff < 0) { mz_diff = -mz_diff; } mz_aad_by_elems.push_back(mz_diff); mz_aad += mz_diff; double it_ratio = hs_iter->getIntensity() / (cm_iter->getIntensity() ? cm_iter->getIntensity() : 1.); it_delta_by_elems.push_back(it_ratio); if (it_ratio < 1.) { it_ratio = 1. / it_ratio; } it_aad_by_elems.push_back(it_ratio); it_aad += it_ratio; } if (!cm_iter->empty()) { rt_aad /= cm_iter->size(); mz_aad /= cm_iter->size(); it_aad /= cm_iter->size(); } // otherwise rt_aad etc. are 0 anyway rt_aad_by_cfs.push_back(rt_aad); mz_aad_by_cfs.push_back(mz_aad); it_aad_by_cfs.push_back(it_aad); } OpenMS::SomeStatistics some_statistics; os.precision(writtenDigits(ConsensusFeature::IntensityType())); os << "Intensities of consensus features:" << endl << some_statistics(intensities) << endl; os.precision(writtenDigits(ConsensusFeature::QualityType())); os << "Qualities of consensus features:" << endl << some_statistics(qualities) << endl; os.precision(writtenDigits(ConsensusFeature::CoordinateType())); os << "Retention time differences ( element-center, weight 1 per element):" << endl << some_statistics(rt_delta_by_elems) << endl; os << "Absolute retention time differences ( |element-center|, weight 1 per element):" << endl << some_statistics(rt_aad_by_elems) << endl; os << "Average absolute differences of retention time within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(rt_aad_by_cfs) << endl; os.precision(writtenDigits(ConsensusFeature::CoordinateType())); os << "Mass-to-charge differences ( element-center, weight 1 per element):" << endl << some_statistics(mz_delta_by_elems) << endl; os << "Absolute differences of mass-to-charge ( |element-center|, weight 1 per element):" << endl << some_statistics(mz_aad_by_elems) << endl; os << "Average absolute differences of mass-to-charge within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(mz_aad_by_cfs) << endl; os.precision(writtenDigits(ConsensusFeature::IntensityType())); os << "Intensity ratios ( element/center, weight 1 per element):" << endl << some_statistics(it_delta_by_elems) << endl; os << "Relative intensity error ( max{(element/center),(center/element)}, weight 1 per element):" << endl << some_statistics(it_aad_by_elems) << endl; os << "Average relative intensity error within consensus features ( max{(element/center),(center/element)}, weight 1 per consensus features):" << endl << some_statistics(it_aad_by_cfs) << endl; } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList in = getStringList_("in"); String edta = getStringOption_("pos"); String out = getStringOption_("out"); String out_sep = getStringOption_("out_separator"); String out_TIC_debug = getStringOption_("auto_rt:out_debug_TIC"); StringList in_header = getStringList_("in_header"); // number of out_debug_TIC files and input files must be identical /*if (out_TIC_debug.size() > 0 && in.size() != out_TIC_debug.size()) { LOG_FATAL_ERROR << "Error: number of input file 'in' and auto_rt:out_debug_TIC files must be identical!" << std::endl; return ILLEGAL_PARAMETERS; }*/ // number of header files and input files must be identical if (in_header.size() > 0 && in.size() != in_header.size()) { LOG_FATAL_ERROR << "Error: number of input file 'in' and 'in_header' files must be identical!" << std::endl; return ILLEGAL_PARAMETERS; } if (!getFlag_("auto_rt:enabled") && !out_TIC_debug.empty()) { LOG_FATAL_ERROR << "Error: TIC output file requested, but auto_rt is not enabled! Either do not request the file or switch on 'auto_rt:enabled'." << std::endl; return ILLEGAL_PARAMETERS; } double rttol = getDoubleOption_("rt_tol"); double mztol = getDoubleOption_("mz_tol"); Size rt_collect = getIntOption_("rt_collect"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mzml_file; mzml_file.setLogType(log_type_); MSExperiment<Peak1D> exp, exp_pp; EDTAFile ed; ConsensusMap cm; ed.load(edta, cm); StringList tf_single_header0, tf_single_header1, tf_single_header2; // header content, for each column std::vector<String> vec_single; // one line for each compound, multiple columns per experiment vec_single.resize(cm.size()); for (Size fi = 0; fi < in.size(); ++fi) { // load raw data mzml_file.load(in[fi], exp); exp.sortSpectra(true); if (exp.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry." << std::endl; return INCOMPATIBLE_INPUT_DATA; } // try to detect RT peaks (only for the first input file -- all others should align!) // cm.size() might change in here... if (getFlag_("auto_rt:enabled") && fi == 0) { ConsensusMap cm_local = cm; // we might have different RT peaks for each map if 'auto_rt' is enabled cm.clear(false); // reset global list (about to be filled) // compute TIC MSChromatogram<> tic = exp.getTIC(); MSSpectrum<> tics, tic_gf, tics_pp, tics_sn; for (Size ic = 0; ic < tic.size(); ++ic) { // rewrite Chromatogram to MSSpectrum (GaussFilter requires it) Peak1D peak; peak.setMZ(tic[ic].getRT()); peak.setIntensity(tic[ic].getIntensity()); tics.push_back(peak); } // smooth (no PP_CWT here due to efficiency reasons -- large FWHM take longer!) double fwhm = getDoubleOption_("auto_rt:FHWM"); GaussFilter gf; Param p = gf.getParameters(); p.setValue("gaussian_width", fwhm * 2); // wider than FWHM, just to be sure we have a fully smoothed peak. Merging two peaks is unlikely p.setValue("use_ppm_tolerance", "false"); gf.setParameters(p); tic_gf = tics; gf.filter(tic_gf); // pick peaks PeakPickerHiRes pp; p = pp.getParameters(); p.setValue("signal_to_noise", getDoubleOption_("auto_rt:SNThreshold")); pp.setParameters(p); pp.pick(tic_gf, tics_pp); if (tics_pp.size()) { LOG_INFO << "Found " << tics_pp.size() << " auto-rt peaks at: "; for (Size ipp = 0; ipp != tics_pp.size(); ++ipp) LOG_INFO << " " << tics_pp[ipp].getMZ(); } else { LOG_INFO << "Found no auto-rt peaks. Change threshold parameters!"; } LOG_INFO << std::endl; if (!out_TIC_debug.empty()) // if debug file was given { // store intermediate steps for debug MSExperiment<> out_debug; out_debug.addChromatogram(toChromatogram(tics)); out_debug.addChromatogram(toChromatogram(tic_gf)); SignalToNoiseEstimatorMedian<MSSpectrum<> > snt; snt.init(tics); for (Size is = 0; is < tics.size(); ++is) { Peak1D peak; peak.setMZ(tic[is].getMZ()); peak.setIntensity(snt.getSignalToNoise(tics[is])); tics_sn.push_back(peak); } out_debug.addChromatogram(toChromatogram(tics_sn)); out_debug.addChromatogram(toChromatogram(tics_pp)); // get rid of "native-id" missing warning for (Size id = 0; id < out_debug.size(); ++id) out_debug[id].setNativeID(String("spectrum=") + id); mzml_file.store(out_TIC_debug, out_debug); LOG_DEBUG << "Storing debug AUTO-RT: " << out_TIC_debug << std::endl; } // add target EICs: for each m/z with no/negative RT, add all combinations of that m/z with auto-RTs // duplicate m/z entries will be ignored! // all other lines with positive RT values are copied unaffected //do not allow doubles std::set<double> mz_doubles; for (ConsensusMap::Iterator cit = cm_local.begin(); cit != cm_local.end(); ++cit) { if (cit->getRT() < 0) { if (mz_doubles.find(cit->getMZ()) == mz_doubles.end()) { mz_doubles.insert(cit->getMZ()); } else { LOG_INFO << "Found duplicate m/z entry (" << cit->getMZ() << ") for auto-rt. Skipping ..." << std::endl; continue; } ConsensusMap cm_RT_multiplex; for (MSSpectrum<>::ConstIterator itp = tics_pp.begin(); itp != tics_pp.end(); ++itp) { ConsensusFeature f = *cit; f.setRT(itp->getMZ()); cm.push_back(f); } } else { // default feature with no auto-rt LOG_INFO << "copying feature with RT " << cit->getRT() << std::endl; cm.push_back(*cit); } } // resize, since we have more positions now vec_single.resize(cm.size()); } // search for each EIC and add up Int not_found(0); Map<Size, double> quant; String description; if (fi < in_header.size()) { HeaderInfo info(in_header[fi]); description = info.header_description; } if (fi == 0) { // two additional columns for first file (theoretical RT and m/z) tf_single_header0 << "" << ""; tf_single_header1 << "" << ""; tf_single_header2 << "RT" << "mz"; } // 5 entries for each input file tf_single_header0 << File::basename(in[fi]) << "" << "" << "" << ""; tf_single_header1 << description << "" << "" << "" << ""; tf_single_header2 << "RTobs" << "dRT" << "mzobs" << "dppm" << "intensity"; for (Size i = 0; i < cm.size(); ++i) { //std::cerr << "Rt" << cm[i].getRT() << " mz: " << cm[i].getMZ() << " R " << cm[i].getMetaValue("rank") << "\n"; double mz_da = mztol * cm[i].getMZ() / 1e6; // mz tolerance in Dalton MSExperiment<>::ConstAreaIterator it = exp.areaBeginConst(cm[i].getRT() - rttol / 2, cm[i].getRT() + rttol / 2, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da); Peak2D max_peak; max_peak.setIntensity(0); max_peak.setRT(cm[i].getRT()); max_peak.setMZ(cm[i].getMZ()); for (; it != exp.areaEndConst(); ++it) { if (max_peak.getIntensity() < it->getIntensity()) { max_peak.setIntensity(it->getIntensity()); max_peak.setRT(it.getRT()); max_peak.setMZ(it->getMZ()); } } double ppm = 0; // observed m/z offset if (max_peak.getIntensity() == 0) { ++not_found; } else { // take median for m/z found std::vector<double> mz; MSExperiment<>::Iterator itm = exp.RTBegin(max_peak.getRT()); SignedSize low = std::min<SignedSize>(std::distance(exp.begin(), itm), rt_collect); SignedSize high = std::min<SignedSize>(std::distance(itm, exp.end()) - 1, rt_collect); MSExperiment<>::AreaIterator itt = exp.areaBegin((itm - low)->getRT() - 0.01, (itm + high)->getRT() + 0.01, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da); for (; itt != exp.areaEnd(); ++itt) { mz.push_back(itt->getMZ()); //std::cerr << "ppm: " << itt.getRT() << " " << itt->getMZ() << " " << itt->getIntensity() << std::endl; } if ((SignedSize)mz.size() > (low + high + 1)) LOG_WARN << "Compound " << i << " has overlapping peaks [" << mz.size() << "/" << low + high + 1 << "]" << std::endl; if (!mz.empty()) { double avg_mz = std::accumulate(mz.begin(), mz.end(), 0.0) / double(mz.size()); //std::cerr << "avg: " << avg_mz << "\n"; ppm = (avg_mz - cm[i].getMZ()) / cm[i].getMZ() * 1e6; } } // appending the second column set requires separator String append_sep = (fi == 0 ? "" : out_sep); vec_single[i] += append_sep; // new line if (fi == 0) { vec_single[i] += String(cm[i].getRT()) + out_sep + String(cm[i].getMZ()) + out_sep; } vec_single[i] += String(max_peak.getRT()) + out_sep + String(max_peak.getRT() - cm[i].getRT()) + out_sep + String(max_peak.getMZ()) + out_sep + String(ppm) + out_sep + String(max_peak.getIntensity()); } if (not_found) LOG_INFO << "Missing peaks for " << not_found << " compounds in file '" << in[fi] << "'.\n"; } //------------------------------------------------------------- // create header //------------------------------------------------------------- vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header2, out_sep)); vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header1, out_sep)); vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header0, out_sep)); //------------------------------------------------------------- // writing output //------------------------------------------------------------- TextFile tf; for (std::vector<String>::iterator v_it = vec_single.begin(); v_it != vec_single.end(); ++v_it) { tf.addLine(*v_it); } tf.store(out); return EXECUTION_OK; }
FeatureHandle handle6(1, static_cast<BaseFeature>(maps[1][0])); out.resize(1); out[0].insert(handle5); out[0].insert(handle6); // need an instance of FeatureGroupingAlgorithm: String algo_name = Factory<FeatureGroupingAlgorithm>::registeredProducts()[0]; FeatureGroupingAlgorithm* algo = Factory<FeatureGroupingAlgorithm>::create( algo_name); algo->transferSubelements(maps, out); TEST_EQUAL(out.getFileDescriptions().size(), 4); TEST_EQUAL(out.getFileDescriptions()[0].filename, "file1"); TEST_EQUAL(out.getFileDescriptions()[3].filename, "file4"); TEST_EQUAL(out.size(), 1); TEST_EQUAL(out[0].size(), 4); ConsensusFeature::HandleSetType group = out[0].getFeatures(); ConsensusFeature::HandleSetType::const_iterator it = group.begin(); handle3.setMapIndex(2); handle4.setMapIndex(3); TEST_EQUAL(*it++ == handle1, true); TEST_EQUAL(*it++ == handle2, true); TEST_EQUAL(*it++ == handle3, true); TEST_EQUAL(*it++ == handle4, true); } END_SECTION
void ProteinInference::infer_(ConsensusMap & consensus_map, const size_t protein_idenfication_index, const UInt reference_map) { ProteinIdentification & protein_ident = consensus_map.getProteinIdentifications()[protein_idenfication_index]; for (size_t i = 0; i < protein_ident.getHits().size(); ++i) { // Protein Accession String accession = protein_ident.getHits()[i].getAccession(); // consensus feature -> peptide hit Map<size_t, PeptideHit> consensus_to_peptide; // search for it in consensus elements: for (size_t i_cm = 0; i_cm < consensus_map.size(); ++i_cm) { std::vector<PeptideHit> peptide_hits; for (std::vector<PeptideIdentification>::iterator it_pepid = consensus_map[i_cm].getPeptideIdentifications().begin(); it_pepid != consensus_map[i_cm].getPeptideIdentifications().end(); ++it_pepid) { // are Protein- and PeptideIdentification from the same search engine run? if (it_pepid->getIdentifier() != protein_ident.getIdentifier()) continue; std::vector<PeptideHit> peptide_hits_local; it_pepid->getReferencingHits(accession, peptide_hits_local); if (peptide_hits_local.empty()) continue; if (sortByUnique_(peptide_hits_local, it_pepid->isHigherScoreBetter())) // we found a unique peptide { peptide_hits.push_back(peptide_hits_local[0]); } } // if several PeptideIdentifications (==Spectra) were assigned to current ConsensusElement // --> take the best (as above), e.g. in SILAC this could happen // TODO: better idea? if (!peptide_hits.empty()) { if (sortByUnique_(peptide_hits, consensus_map[i_cm].getPeptideIdentifications()[0].isHigherScoreBetter())) //found a unique peptide for current ConsensusElement { consensus_to_peptide[i_cm] = peptide_hits[0]; #ifdef DEBUG_INFERENCE std::cout << "assign peptide " << peptide_hits[0].getSequence() << " to Protein " << accession << std::endl; #endif } } } // ! ConsensusMap loop // no peptides found that match current Protein if (consensus_to_peptide.empty()) continue; // Use all matching ConsensusElements to derive a quantitation for current protein // build up ratios for every map vs reference double coverage = 0; Map<Size, std::vector<IntensityType> > ratios; // number of unique peptides pointing to current protein UInt coverage_count = (UInt)consensus_to_peptide.size(); for (Map<size_t, PeptideHit>::iterator it_pephits = consensus_to_peptide.begin(); it_pephits != consensus_to_peptide.end(); ++it_pephits) { coverage += it_pephits->second.getSequence().size(); const ConsensusFeature::HandleSetType & handles = consensus_map[it_pephits->first].getFeatures(); //search if reference is present ConsensusFeature::HandleSetType::const_iterator it_ref = handles.end(); for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin(); it != handles.end(); ++it) { if (it->getMapIndex() == reference_map) { it_ref = it; break; } } // did not find a reference // TODO assume intensity==0 instead?? if (it_ref == handles.end()) continue; for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin(); it != handles.end(); ++it) { ratios[it->getMapIndex()].push_back(it->getIntensity() / it_ref->getIntensity()); } } // sort ratios map-wise and take median for (ConsensusMap::FileDescriptions::const_iterator it_file = consensus_map.getFileDescriptions().begin(); it_file != consensus_map.getFileDescriptions().end(); ++it_file) { if (ratios.has(it_file->first)) { //sort intensity ratios for map #it_file->first std::sort(ratios[it_file->first].begin(), ratios[it_file->first].end()); //take median IntensityType protein_ratio = ratios[it_file->first][ratios[it_file->first].size() / 2]; //TODO if ratios have high variance emit a warning! protein_ident.getHits()[i].setMetaValue(String("ratio_") + String(it_file->first), protein_ratio); } } // ! map loop // % coverage of protein by peptides coverage /= DoubleReal(protein_ident.getHits()[i].getSequence().size()) / 100; protein_ident.getHits()[i].setMetaValue("coverage", coverage); protein_ident.getHits()[i].setMetaValue("hits", coverage_count); } // ! Protein loop // protein_to_peptides now contains the Protein -> Peptides mapping // lets estimate the }
ExitCodes main_(int, const char **) { //load input features FeatureMap input; FeatureXMLFile().load(getStringOption_("in"), input); //load truth consensusXML ConsensusMap truth; ConsensusXMLFile().load(getStringOption_("truth"), truth); //parameters double mz_tol = getDoubleOption_("mz_tol"); double rt_tol = getDoubleOption_("rt_tol"); //seek manual feature in automatic feature map UInt matched_pairs = 0; UInt half_matched_pairs = 0; vector<double> t_ratio, i_ratio, rt_diffs, mz_diffs; for (Size t = 0; t < truth.size(); ++t) { if (truth[t].size() != 2) { cerr << "Error: consensus feature must contain exactly two elements!" << endl; continue; } vector<Feature> best_matches(2); vector<UInt> match_counts(2, 0); vector<Peak2D> elements(2); elements[0] = *(truth[t].getFeatures().begin()); elements[1] = *(++(truth[t].getFeatures().begin())); double mz_tol_charged = mz_tol / truth[t].getCharge(); for (Size e = 0; e < 2; ++e) { double best_score = 0.0; for (Size i = 0; i < input.size(); ++i) { const Feature & f_i = input[i]; if (fabs(f_i.getRT() - elements[e].getRT()) < rt_tol && fabs(f_i.getMZ() - elements[e].getMZ()) < mz_tol_charged) { ++match_counts[e]; double score = (1.0 - fabs(f_i.getMZ() - elements[e].getMZ()) / mz_tol_charged) * (1.0 - fabs(f_i.getRT() - elements[e].getRT()) / rt_tol); if (score > best_score) { best_score = score; best_matches[e] = f_i; } } } } //not matched if (match_counts[0] == 0 && match_counts[1] == 0) { } //half matched else if ((match_counts[0] > 0 && match_counts[1] == 0) || (match_counts[0] == 0 && match_counts[1] > 0)) { ++half_matched_pairs; } //matched else { ++matched_pairs; double a_r = best_matches[0].getIntensity() / best_matches[1].getIntensity(); t_ratio.push_back(a_r); double m_r = elements[0].getIntensity() / elements[1].getIntensity(); i_ratio.push_back(m_r); rt_diffs.push_back(best_matches[1].getRT() - best_matches[0].getRT()); mz_diffs.push_back((best_matches[1].getMZ() - best_matches[0].getMZ()) * truth[t].getCharge()); } } cout << endl; cout << "pair detection statistics:" << endl; cout << "==========================" << endl; cout << "truth pairs: " << truth.size() << endl; cout << "input features: " << input.size() << endl; cout << endl; cout << "found: " << matched_pairs << " (" << String::number(100.0 * matched_pairs / truth.size(), 2) << "%)" << endl; cout << "half found : " << half_matched_pairs << " (" << String::number(100.0 * half_matched_pairs / truth.size(), 2) << "%)" << endl; cout << "not found : " << truth.size() - (matched_pairs + half_matched_pairs) << " (" << String::number(100.0 - 100.0 * (matched_pairs + half_matched_pairs) / truth.size(), 2) << "%)" << endl; cout << endl; cout << "relative pair ratios: " << fiveNumberQuotients(i_ratio, t_ratio, 3) << endl; cout << "pair distance RT : " << fiveNumbers(rt_diffs, 2) << endl; cout << "pair distance m/z: " << fiveNumbers(mz_diffs, 2) << endl; return EXECUTION_OK; }
TEST_EQUAL(feature_maps[0][2].getIntensity(), 200) TEST_EQUAL(feature_maps[0][2].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "CNHAAAAAAAAA") TEST_EQUAL(feature_maps[0][3].getIntensity(), 120) TEST_EQUAL(feature_maps[0][3].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "CNHAADDAAAAA") TEST_EQUAL(feature_maps[0][4].getIntensity(), 250) TEST_EQUAL(feature_maps[0][4].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "HHHHHHHHHHH") TEST_EQUAL(feature_maps[0][5].getIntensity(), 100) TEST_EQUAL(feature_maps[0][5].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "LDCELR") // Test ConsensusMap association ConsensusMap cm = labeler.getConsensus(); TEST_EQUAL(cm.size(), 1) ABORT_IF(cm.size() != 1) TEST_EQUAL(cm[0].getFeatures().size(),2) ConsensusFeature::HandleSetType::const_iterator fhIt = cm[0].getFeatures().begin(); TEST_EQUAL(feature_maps[0][1].getUniqueId(), fhIt->getUniqueId()) ++fhIt; TEST_EQUAL(feature_maps[0][0].getUniqueId(), fhIt->getUniqueId()) // now test the incomplete variant createTestFeatureMapSimVector_(feature_maps); digestFeaturesMapSimVector_(feature_maps); O18Labeler incomplete_labeler; Param p; p.setValue("labeling_efficiency", 0.7);
void IDMapper::annotate(ConsensusMap & map, const std::vector<PeptideIdentification> & ids, const std::vector<ProteinIdentification> & protein_ids, bool measure_from_subelements) { // validate "RT" and "MZ" metavalues exist checkHits_(ids); //append protein identifications to Map map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end()); //keep track of assigned/unassigned peptide identifications std::map<Size, Size> assigned; // store which peptides fit which feature (and avoid double entries) // consensusMap -> {peptide_index} std::vector<std::set<size_t> > mapping(map.size()); DoubleList mz_values; DoubleReal rt_pep; IntList charges; //iterate over the peptide IDs for (Size i = 0; i < ids.size(); ++i) { if (ids[i].getHits().empty()) continue; getIDDetails_(ids[i], rt_pep, mz_values, charges); //iterate over the features for (Size cm_index = 0; cm_index < map.size(); ++cm_index) { // if set to TRUE, we leave the i_mz-loop as we added the whole ID with all hits bool was_added = false; // was current pep-m/z matched?! // iterate over m/z values of pepIds for (Size i_mz = 0; i_mz < mz_values.size(); ++i_mz) { DoubleReal mz_pep = mz_values[i_mz]; // charge states to use for checking: IntList current_charges; if (!ignore_charge_) { // if "mz_ref." is "precursor", we have only one m/z value to check, // but still one charge state per peptide hit that could match: if (mz_values.size() == 1) { current_charges = charges; } else { current_charges.push_back(charges[i_mz]); } current_charges.push_back(0); // "not specified" always matches } //check if we compare distance from centroid or subelements if (!measure_from_subelements) { if (isMatch_(rt_pep - map[cm_index].getRT(), mz_pep, map[cm_index].getMZ()) && (ignore_charge_ || ListUtils::contains(current_charges, map[cm_index].getCharge()))) { was_added = true; map[cm_index].getPeptideIdentifications().push_back(ids[i]); ++assigned[i]; } } else { for (ConsensusFeature::HandleSetType::const_iterator it_handle = map[cm_index].getFeatures().begin(); it_handle != map[cm_index].getFeatures().end(); ++it_handle) { if (isMatch_(rt_pep - it_handle->getRT(), mz_pep, it_handle->getMZ()) && (ignore_charge_ || ListUtils::contains(current_charges, it_handle->getCharge()))) { was_added = true; if (mapping[cm_index].count(i) == 0) { map[cm_index].getPeptideIdentifications().push_back(ids[i]); ++assigned[i]; mapping[cm_index].insert(i); } break; // we added this peptide already.. no need to check other handles } } // continue to here } if (was_added) break; } // m/z values to check // break to here } // features } // Identifications Size matches_none(0); Size matches_single(0); Size matches_multi(0); //append unassigned peptide identifications for (Size i = 0; i < ids.size(); ++i) { if (assigned[i] == 0) { map.getUnassignedPeptideIdentifications().push_back(ids[i]); ++matches_none; } else if (assigned[i] == 1) { ++matches_single; } else if (assigned[i] > 1) { ++matches_multi; } } //some statistics output LOG_INFO << "Unassigned peptides: " << matches_none << "\n" << "Peptides assigned to exactly one feature: " << matches_single << "\n" << "Peptides assigned to multiple features: " << matches_multi << std::endl; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input file names String in = getStringOption_("in"); bool write_mzML_index = getFlag_("write_mzML_index"); //input file type FileHandler fh; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } //output file names and types String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } bool TIC_DTA2D = getFlag_("TIC_DTA2D"); bool process_lowmemory = getFlag_("process_lowmemory"); writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 1); String uid_postprocessing = getStringOption_("UID_postprocessing"); //------------------------------------------------------------- // reading input //------------------------------------------------------------- typedef MSExperiment<Peak1D> MSExperimentType; MSExperimentType exp; typedef MSExperimentType::SpectrumType SpectrumType; typedef FeatureMap FeatureMapType; FeatureMapType fm; ConsensusMap cm; writeDebug_(String("Loading input file"), 1); if (in_type == FileTypes::CONSENSUSXML) { ConsensusXMLFile().load(in, cm); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You you will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting consensus features to peaks. You will lose information!"); exp.set2DData(cm); } } else if (in_type == FileTypes::EDTA) { EDTAFile().load(in, cm); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You you will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting consensus features to peaks. You will lose information!"); exp.set2DData(cm); } } else if (in_type == FileTypes::FEATUREXML || in_type == FileTypes::TSV || in_type == FileTypes::PEPLIST || in_type == FileTypes::KROENIK) { fh.loadFeatures(in, fm, in_type); fm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting features to peaks. You will lose information! Mass traces are added, if present as 'num_of_masstraces' and 'masstrace_intensity_<X>' (X>=0) meta values."); exp.set2DData<true>(fm); } } else if (process_lowmemory) { // Special switch for the low memory options: // We can transform the complete experiment directly without first // loading the complete data into memory. PlainMSDataWritingConsumer will // write out mzML to disk as they are read from the input. if (in_type == FileTypes::MZML && out_type == FileTypes::MZML) { PlainMSDataWritingConsumer consumer(out); consumer.getOptions().setWriteIndex(write_mzML_index); consumer.addDataProcessing(getProcessingInfo_(DataProcessing::CONVERSION_MZML)); MzMLFile mzmlfile; mzmlfile.setLogType(log_type_); mzmlfile.transform(in, &consumer); return EXECUTION_OK; } else if (in_type == FileTypes::MZXML && out_type == FileTypes::MZML) { PlainMSDataWritingConsumer consumer(out); consumer.getOptions().setWriteIndex(write_mzML_index); consumer.addDataProcessing(getProcessingInfo_(DataProcessing::CONVERSION_MZML)); MzXMLFile mzxmlfile; mzxmlfile.setLogType(log_type_); mzxmlfile.transform(in, &consumer); return EXECUTION_OK; } else { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Process_lowmemory option can only be used with mzML / mzXML input and mzML output data types."); } } else { fh.loadExperiment(in, exp, in_type, log_type_); } //------------------------------------------------------------- // writing output //------------------------------------------------------------- writeDebug_(String("Writing output file"), 1); if (out_type == FileTypes::MZML) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZML)); MzMLFile f; f.setLogType(log_type_); f.getOptions().setWriteIndex(write_mzML_index); ChromatogramTools().convertSpectraToChromatograms(exp, true); f.store(out, exp); } else if (out_type == FileTypes::MZDATA) { //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZDATA)); MzDataFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); f.store(out, exp); } else if (out_type == FileTypes::MZXML) { //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZXML)); MzXMLFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); f.store(out, exp); } else if (out_type == FileTypes::DTA2D) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); DTA2DFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); if (TIC_DTA2D) { // store the total ion chromatogram (TIC) f.storeTIC(out, exp); } else { // store entire experiment f.store(out, exp); } } else if (out_type == FileTypes::MGF) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); MascotGenericFile f; f.setLogType(log_type_); f.store(out, exp, getFlag_("MGF_compact")); } else if (out_type == FileTypes::FEATUREXML) { if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) || (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK)) { if (uid_postprocessing == "ensure") { fm.applyMemberFunction(&UniqueIdInterface::ensureUniqueId); } else if (uid_postprocessing == "reassign") { fm.applyMemberFunction(&UniqueIdInterface::setUniqueId); } } else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA) { MapConversion::convert(cm, true, fm); } else // not loaded as feature map or consensus map { // The feature specific information is only defaulted. Enough reasons to issue a warning! writeLog_("Warning: Converting peaks to features will lead to incomplete features!"); fm.clear(); fm.reserve(exp.getSize()); typedef FeatureMapType::FeatureType FeatureType; FeatureType feature; feature.setQuality(0, 1); // override default feature.setQuality(1, 1); // override default feature.setOverallQuality(1); // override default for (MSExperimentType::ConstIterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter ) { feature.setRT(spec_iter->getRT()); for (SpectrumType::ConstIterator peak1_iter = spec_iter->begin(); peak1_iter != spec_iter->end(); ++peak1_iter ) { feature.setMZ(peak1_iter->getMZ()); feature.setIntensity(peak1_iter->getIntensity()); feature.setUniqueId(); fm.push_back(feature); } } fm.updateRanges(); } addDataProcessing_(fm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); FeatureXMLFile().store(out, fm); } else if (out_type == FileTypes::CONSENSUSXML) { if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) || (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK)) { if (uid_postprocessing == "ensure") { fm.applyMemberFunction(&UniqueIdInterface::ensureUniqueId); } else if (uid_postprocessing == "reassign") { fm.applyMemberFunction(&UniqueIdInterface::setUniqueId); } MapConversion::convert(0, fm, cm); } // nothing to do for consensus input else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA) { } else // experimental data { MapConversion::convert(0, exp, cm, exp.size()); } addDataProcessing_(cm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); ConsensusXMLFile().store(out, cm); } else if (out_type == FileTypes::EDTA) { if (fm.size() > 0 && cm.size() > 0) { LOG_ERROR << "Internal error: cannot decide on container (Consensus or Feature)! This is a bug. Please report it!"; return INTERNAL_ERROR; } if (fm.size() > 0) EDTAFile().store(out, fm); else if (cm.size() > 0) EDTAFile().store(out, cm); } else if (out_type == FileTypes::CSV) { // as ibspectra is currently the only csv/text based format we assume // that out_type == FileTypes::CSV means ibspectra, if more formats // are added we need a more intelligent strategy to decide which // conversion is requested // IBSpectra selected as output type if (in_type != FileTypes::CONSENSUSXML) { LOG_ERROR << "Incompatible input data: FileConverter can only convert consensusXML files to ibspectra format."; return INCOMPATIBLE_INPUT_DATA; } IBSpectraFile ibfile; ibfile.store(out, cm); } else { writeLog_("Unknown output file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } return EXECUTION_OK; }