std::vector<Feature> FeatureFinderAlgorithmSHCtrl::extractPeaks(Vec datavec) { SuperHirnParameters::instance()->initIsotopeDist_ = false; // reset this so that the IsotopeDist gets reinitalized FTPeakDetectController controller; controller.startScanParsing(datavec); std::vector<Feature> thefeatures; std::vector<SHFeature>::iterator p = controller.getLCMS()->get_feature_list_begin(); while (p != controller.getLCMS()->get_feature_list_end()) { Feature f; double mz = (*p).get_MZ(); f.setMZ(mz); int charge = (*p).get_charge_state(); f.setCharge(charge); double rt = (*p).get_retention_time(); rt *= 60.0; // convert back f.setRT(rt); double darea = (*p).get_peak_area(); float area = (float) darea; f.setIntensity(area); // ------------------------------------------------------------------------------ // Convex hull -- needs to be calculated differently according to Markus Mueller // ------------------------------------------------------------------------------ // FeatureLCProfile* profile = (*p).getLCelutionProfile(); // ConvexHull2D::PointArrayType hull_points(profile->getNbLCelutionSignals()); // // // the key is SCAN // unsigned int j = 0; // std::map<int, MS1Signal>::iterator lcit; // for (lcit = profile->getLCelutionSignalsStart(); lcit != profile->getLCelutionSignalsEnd(); lcit++) { // //int scan = lcit->first; // MS1Signal signal = lcit->second; // // hull_points[j][0] = signal.TR * 60.0; // convert back // hull_points[j][1] = signal.mass; // j++; // } // // ConvexHull2D hull; // hull.addPoints(hull_points); // f.getConvexHulls().push_back(hull); // ------------------------------------------------------------------------------ thefeatures.push_back(f); p++; } return thefeatures; }
ExitCodes main_(int, const char**) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = FileHandler().getTypeByFileName(out); } //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mz_data_file; mz_data_file.setLogType(log_type_); PeakMap ms_peakmap; std::vector<Int> ms_level(1, 1); (mz_data_file.getOptions()).setMSLevels(ms_level); mz_data_file.load(in, ms_peakmap); if (ms_peakmap.size() == 0) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry."; return INCOMPATIBLE_INPUT_DATA; } // make sure that the spectra are sorted by m/z ms_peakmap.sortSpectra(true); //------------------------------------------------------------- // get params for MTD and EPD algorithms //------------------------------------------------------------- Param com_param = getParam_().copy("algorithm:common:", true); writeDebug_("Common parameters passed to both sub-algorithms (mtd and epd)", com_param, 3); Param mtd_param = getParam_().copy("algorithm:mtd:", true); writeDebug_("Parameters passed to MassTraceDetection", mtd_param, 3); Param epd_param = getParam_().copy("algorithm:epd:", true); writeDebug_("Parameters passed to ElutionPeakDetection", epd_param, 3); //------------------------------------------------------------- // configure and run MTD //------------------------------------------------------------- MassTraceDetection mt_ext; mtd_param.insert("", com_param); mtd_param.remove("chrom_fwhm"); mt_ext.setParameters(mtd_param); vector<MassTrace> m_traces; mt_ext.run(ms_peakmap, m_traces); vector<MassTrace> m_traces_final; bool use_epd = epd_param.getValue("enabled").toBool(); if (!use_epd) { swap(m_traces_final, m_traces); } else { ElutionPeakDetection ep_det; epd_param.remove("enabled"); // artificially added above epd_param.insert("", com_param); ep_det.setParameters(epd_param); std::vector<MassTrace> split_mtraces; // note: this step will destroy any meta data annotation (e.g. FWHM_mz_avg) ep_det.detectPeaks(m_traces, split_mtraces); if (ep_det.getParameters().getValue("width_filtering") == "auto") { m_traces_final.clear(); ep_det.filterByPeakWidth(split_mtraces, m_traces_final); LOG_INFO << "Notice: " << split_mtraces.size() - m_traces_final.size() << " of total " << split_mtraces.size() << " were dropped because of too low peak width." << std::endl; } else { swap(m_traces_final, split_mtraces); } } //------------------------------------------------------------- // writing consensus map output //------------------------------------------------------------- if (out_type == FileTypes::CONSENSUSXML) { ConsensusMap consensus_map; StringList ms_runs; ms_peakmap.getPrimaryMSRunPath(ms_runs); consensus_map.setPrimaryMSRunPath(ms_runs); for (Size i = 0; i < m_traces_final.size(); ++i) { if (m_traces_final[i].getSize() == 0) continue; ConsensusFeature fcons; int k = 0; for (MassTrace::const_iterator it = m_traces_final[i].begin(); it != m_traces_final[i].end(); ++it) { FeatureHandle fhandle; fhandle.setRT(it->getRT()); fhandle.setMZ(it->getMZ()); fhandle.setIntensity(it->getIntensity()); fhandle.setUniqueId(++k); fcons.insert(fhandle); } fcons.setMetaValue(3, m_traces_final[i].getLabel()); fcons.setCharge(0); fcons.setWidth(m_traces_final[i].estimateFWHM(use_epd)); fcons.setQuality(1 - (1.0 / m_traces_final[i].getSize())); fcons.setRT(m_traces_final[i].getCentroidRT()); fcons.setMZ(m_traces_final[i].getCentroidMZ()); fcons.setIntensity(m_traces_final[i].getIntensity(false)); consensus_map.push_back(fcons); } consensus_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); addDataProcessing_(consensus_map, getProcessingInfo_(DataProcessing::QUANTITATION)); consensus_map.setUniqueId(); ConsensusXMLFile().store(out, consensus_map); } else //(out_type == FileTypes::FEATUREXML) { //----------------------------------------------------------- // convert mass traces to features //----------------------------------------------------------- std::vector<double> stats_sd; FeatureMap ms_feat_map; StringList ms_runs; ms_peakmap.getPrimaryMSRunPath(ms_runs); ms_feat_map.setPrimaryMSRunPath(ms_runs); for (Size i = 0; i < m_traces_final.size(); ++i) { if (m_traces_final[i].getSize() == 0) continue; m_traces_final[i].updateMeanMZ(); m_traces_final[i].updateWeightedMZsd(); Feature f; f.setMetaValue(3, m_traces_final[i].getLabel()); f.setCharge(0); f.setMZ(m_traces_final[i].getCentroidMZ()); f.setIntensity(m_traces_final[i].getIntensity(false)); f.setRT(m_traces_final[i].getCentroidRT()); f.setWidth(m_traces_final[i].estimateFWHM(use_epd)); f.setOverallQuality(1 - (1.0 / m_traces_final[i].getSize())); f.getConvexHulls().push_back(m_traces_final[i].getConvexhull()); double sd = m_traces_final[i].getCentroidSD(); f.setMetaValue("SD", sd); f.setMetaValue("SD_ppm", sd / f.getMZ() * 1e6); if (m_traces_final[i].fwhm_mz_avg > 0) f.setMetaValue("FWHM_mz_avg", m_traces_final[i].fwhm_mz_avg); stats_sd.push_back(m_traces_final[i].getCentroidSD()); ms_feat_map.push_back(f); } // print some stats about standard deviation of mass traces if (stats_sd.size() > 0) { std::sort(stats_sd.begin(), stats_sd.end()); LOG_INFO << "Mass trace m/z s.d.\n" << " low quartile: " << stats_sd[stats_sd.size() * 1 / 4] << "\n" << " median: " << stats_sd[stats_sd.size() * 1 / 2] << "\n" << " upp quartile: " << stats_sd[stats_sd.size() * 3 / 4] << std::endl; } ms_feat_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info TODO addDataProcessing_(ms_feat_map, getProcessingInfo_(DataProcessing::QUANTITATION)); //ms_feat_map.setUniqueId(); FeatureXMLFile().store(out, ms_feat_map); } return EXECUTION_OK; }
void EDTAFile::load(const String& filename, ConsensusMap& consensus_map) { // load input TextFile input(filename); TextFile::ConstIterator input_it = input.begin(); // reset map consensus_map = ConsensusMap(); consensus_map.setUniqueId(); char separator = ' '; if (input_it->hasSubstring("\t")) separator = '\t'; else if (input_it->hasSubstring(" ")) separator = ' '; else if (input_it->hasSubstring(",")) separator = ','; // parsing header line std::vector<String> headers; input_it->split(separator, headers); int offset = 0; for (Size i = 0; i < headers.size(); ++i) { headers[i].trim(); } String header_trimmed = *input.begin(); header_trimmed.trim(); enum { TYPE_UNDEFINED, TYPE_OLD_NOCHARGE, TYPE_OLD_CHARGE, TYPE_CONSENSUS } input_type = TYPE_UNDEFINED; Size input_features = 1; double rt = 0.0; double mz = 0.0; double it = 0.0; Int ch = 0; if (headers.size() <= 2) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: not enough columns! Expected at least 3 columns!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } else if (headers.size() == 3) input_type = TYPE_OLD_NOCHARGE; else if (headers.size() == 4) input_type = TYPE_OLD_CHARGE; // see if we have a header try { // try to convert... if not: thats a header rt = headers[0].toDouble(); mz = headers[1].toDouble(); it = headers[2].toDouble(); } catch (Exception::BaseException&) { offset = 1; ++input_it; LOG_INFO << "Detected a header line.\n"; } if (headers.size() >= 5) { if (String(headers[4].trim()).toUpper() == "RT1") input_type = TYPE_CONSENSUS; else input_type = TYPE_OLD_CHARGE; } if (input_type == TYPE_CONSENSUS) { // Every consensus style line includes features with four columns. // The remainder is meta data input_features = headers.size() / 4; } if (offset == 0 && (input_type == TYPE_OLD_CHARGE || input_type == TYPE_CONSENSUS)) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: No HEADER provided. This is only allowed for three columns. You have more!\nOffending line: '") + header_trimmed + "' (line 1)\n"); } SignedSize input_size = input.end() - input.begin(); ConsensusMap::FileDescription desc; desc.filename = filename; desc.size = (input_size) - offset; consensus_map.getFileDescriptions()[0] = desc; // parsing features consensus_map.reserve(input_size); for (; input_it != input.end(); ++input_it) { //do nothing for empty lines String line_trimmed = *input_it; line_trimmed.trim(); if (line_trimmed == "") { if ((input_it - input.begin()) < input_size - 1) LOG_WARN << "Notice: Empty line ignored (line " << ((input_it - input.begin()) + 1) << ")."; continue; } //split line to tokens std::vector<String> parts; input_it->split(separator, parts); //abort if line does not contain enough fields if (parts.size() < 3) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": At least three columns are needed! (got " + String(parts.size()) + ")\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } ConsensusFeature cf; cf.setUniqueId(); try { // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, 0); mz = checkedToDouble_(parts, 1); it = checkedToDouble_(parts, 2); ch = checkedToInt_(parts, 3); cf.setRT(rt); cf.setMZ(mz); cf.setIntensity(it); if (input_type != TYPE_OLD_NOCHARGE) cf.setCharge(ch); } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert the first three columns to a number!\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } // Check all features in one line for (Size j = 1; j < input_features; ++j) { try { Feature f; f.setUniqueId(); // Convert values. Will return -1 if not available. rt = checkedToDouble_(parts, j * 4 + 0); mz = checkedToDouble_(parts, j * 4 + 1); it = checkedToDouble_(parts, j * 4 + 2); ch = checkedToInt_(parts, j * 4 + 3); // Only accept features with at least RT and MZ set if (rt != -1 && mz != -1) { f.setRT(rt); f.setMZ(mz); f.setIntensity(it); f.setCharge(ch); cf.insert(j - 1, f); } } catch (Exception::BaseException&) { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert one of the four sub-feature columns (starting at column " + (j * 4 + 1) + ") to a number! Is the correct separator specified?\nOffending line: '" + line_trimmed + "' (line " + String((input_it - input.begin()) + 1) + ")\n"); } } //parse meta data for (Size j = input_features * 4; j < parts.size(); ++j) { String part_trimmed = parts[j]; part_trimmed.trim(); if (part_trimmed != "") { //check if column name is ok if (headers.size() <= j || headers[j] == "") { throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Error: Missing meta data header for column ") + (j + 1) + "!" + String("Offending header line: '") + header_trimmed + "' (line 1)"); } //add meta value cf.setMetaValue(headers[j], part_trimmed); } } //insert feature to map consensus_map.push_back(cf); } // register FileDescriptions ConsensusMap::FileDescription fd; fd.filename = filename; fd.size = consensus_map.size(); Size maps = std::max(input_features - 1, Size(1)); // its either a simple feature or a consensus map // (in this case the 'input_features' includes the centroid, which we do not count) for (Size i = 0; i < maps; ++i) { fd.label = String("EDTA_Map ") + String(i); consensus_map.getFileDescriptions()[i] = fd; } }