Example #1
0
 vector<vector<Size> > PScore::calculateRankMap(const PeakMap& peak_map, double mz_window)
 {
   vector<std::vector<Size> > rank_map; // note: ranks are zero based
   rank_map.reserve(peak_map.size());
   for (Size i = 0; i != peak_map.size(); ++i)
   {
     const PeakSpectrum& spec = peak_map[i];
     vector<double> mz;
     vector<double> intensities;
     for (Size j = 0; j != spec.size(); ++j)
     {
       mz.push_back(spec[j].getMZ());
       intensities.push_back(spec[j].getIntensity());
     }
     rank_map.push_back(calculateIntensityRankInMZWindow(mz, intensities, mz_window));
   }
   return rank_map;
 }
  // version for label-free linkers
  void XQuestResultXMLFile::writeXQuestXMLSpec(String out_file, String base_name, const std::vector< std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > >& all_top_csms, const PeakMap& spectra)
  {
    // String spec_xml_filename = base_name + "_matched.spec.xml";
    // XML Header
    std::ofstream spec_xml_file;
    std::cout << "Writing spec.xml to " << out_file << std::endl;
    spec_xml_file.open(out_file.c_str(), std::ios::trunc); // ios::app = append to file, ios::trunc = overwrites file
    // TODO write actual data
    spec_xml_file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?><xquest_spectra compare_peaks_version=\"3.4\" date=\"Tue Nov 24 12:41:18 2015\" author=\"Thomas Walzthoeni,Oliver Rinner\" homepage=\"http://proteomics.ethz.ch\" resultdir=\"aleitner_M1012_004_matched\" deffile=\"xquest.def\" >" << std::endl;

    // collect indices of spectra, that need to be written out
    std::vector <Size> spectrum_indices;

    for (Size i = 0; i < all_top_csms.size(); ++i)
    {
      if (!all_top_csms[i].empty())
      {
        if (all_top_csms[i][0].scan_index_light < spectra.size())
        {
          spectrum_indices.push_back(all_top_csms[i][0].scan_index_light);
        }
      }
    }

    // loop over list of indices and write out spectra
    for (Size i = 0; i < spectrum_indices.size(); ++i)
    {
      String spectrum_light_name = base_name + ".light." + spectrum_indices[i];
      String spectrum_heavy_name = base_name + ".heavy." + spectrum_indices[i];

      String spectrum_name = spectrum_light_name + String("_") + spectrum_heavy_name;

      // 4 Spectra resulting from a light/heavy spectra pair.  Write for each spectrum, that is written to xquest.xml (should be all considered pairs, or better only those with at least one sensible Hit, meaning a score was computed)
      spec_xml_file << "<spectrum filename=\"" << spectrum_light_name << ".dta" << "\" type=\"light\">" << std::endl;
      spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], String(""));
      spec_xml_file << "</spectrum>" << std::endl;

      spec_xml_file << "<spectrum filename=\"" << spectrum_heavy_name << ".dta" << "\" type=\"heavy\">" << std::endl;
      spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], String(""));
      spec_xml_file << "</spectrum>" << std::endl;

      String spectrum_common_name = spectrum_name + String("_common.txt");
      spec_xml_file << "<spectrum filename=\"" << spectrum_common_name << "\" type=\"common\">" << std::endl;
      spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta");
      spec_xml_file << "</spectrum>" << std::endl;

      String spectrum_xlink_name = spectrum_name + String("_xlinker.txt");
      spec_xml_file << "<spectrum filename=\"" << spectrum_xlink_name << "\" type=\"xlinker\">" << std::endl;
      spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta");
      spec_xml_file << "</spectrum>" << std::endl;
    }

    spec_xml_file << "</xquest_spectra>" << std::endl;
    spec_xml_file.close();

    return;
  }
 void getPrecursors_(const PeakMap & exp, vector<Precursor> & precursors, vector<double> & precursors_rt)
 {
   for (Size i = 0; i != exp.size(); ++i)
   {
     vector<Precursor> pcs = exp[i].getPrecursors();
     if (pcs.empty())
     {
       continue;
     }
     vector<double> pcs_rt(pcs.size(), exp[i].getRT());
     copy(pcs.begin(), pcs.end(), back_inserter(precursors));
     copy(pcs_rt.begin(), pcs_rt.end(), back_inserter(precursors_rt));
   }
 }
Example #4
0
  void FeatureFinder::run(const String& algorithm_name, PeakMap& input_map, FeatureMap& features, const Param& param, const FeatureMap& seeds)
  {
    // Nothing to do if there is no data
    if ((algorithm_name != "mrm" && input_map.empty()) || (algorithm_name == "mrm" && input_map.getChromatograms().empty()))
    {
      features.clear(true);
      return;
    }

    // check input
    {
      // We need updated ranges => check number of peaks
      if (algorithm_name != "mrm" && input_map.getSize() == 0)
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder needs updated ranges on input map. Aborting.");
      }

      // We need MS1 data only => check levels
      if (algorithm_name != "mrm" && (input_map.getMSLevels().size() != 1 || input_map.getMSLevels()[0] != 1))
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data. Aborting.");
      }

      //Check if the peaks are sorted according to m/z
      if (!input_map.isSorted(true))
      {
        LOG_WARN << "Input map is not sorted by RT and m/z! This is done now, before applying the algorithm!" << std::endl;
        input_map.sortSpectra(true);
        input_map.sortChromatograms(true);
      }
      for (Size s = 0; s < input_map.size(); ++s)
      {
        if (input_map[s].empty())
          continue;
        if (input_map[s][0].getMZ() < 0)
        {
          throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on spectra that contain peaks with positive m/z values. Filter the data accordingly beforehand! Aborting.");
        }
      }
    }

    // initialize
    if (algorithm_name != "mrm" && algorithm_name != "centroided")
    {
      // Resize peak flag vector
      flags_.resize(input_map.size());
      for (Size i = 0; i < input_map.size(); ++i)
      {
        flags_[i].assign(input_map[i].size(), UNUSED);
      }
    }

    // do the work
    if (algorithm_name != "none")
    {
      FeatureFinderAlgorithm* algorithm = Factory<FeatureFinderAlgorithm>::create(algorithm_name);
      algorithm->setParameters(param);
      algorithm->setData(input_map, features, *this);
      algorithm->setSeeds(seeds);
      algorithm->run();
      delete(algorithm);
    }

    if (algorithm_name != "mrm") // mrm  works on chromatograms; the next section is only for conventional data
    {
      //report RT apex spectrum index and native ID for each feature
      for (Size i = 0; i < features.size(); ++i)
      {
        //index
        Size spectrum_index = input_map.RTBegin(features[i].getRT()) - input_map.begin();
        features[i].setMetaValue("spectrum_index", spectrum_index);
        //native id
        if (spectrum_index < input_map.size())
        {
          String native_id = input_map[spectrum_index].getNativeID();
          features[i].setMetaValue("spectrum_native_id", native_id);
        }
        else
        {
          /// @todo that happens sometimes using IsotopeWaveletFeatureFinder (Rene, Marc, Andreas, Clemens)
          std::cerr << "FeatureFinderAlgorithm_impl, line=" << __LINE__ << "; FixMe this cannot be, but happens" << std::endl;
        }
      }
    }
  }
  // version for labeled linkers
  void XQuestResultXMLFile::writeXQuestXMLSpec(String out_file, String base_name, const OPXLDataStructs::PreprocessedPairSpectra& preprocessed_pair_spectra, const std::vector< std::pair<Size, Size> >& spectrum_pairs, const std::vector< std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > >& all_top_csms, const PeakMap& spectra)
  {
    //String spec_xml_filename = base_name + "_matched.spec.xml";
    // XML Header
    std::ofstream spec_xml_file;
    std::cout << "Writing spec.xml to " << out_file << std::endl;
    spec_xml_file.open(out_file.c_str(), std::ios::trunc); // ios::app = append to file, ios::trunc = overwrites file
    // TODO write actual data
    spec_xml_file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?><xquest_spectra compare_peaks_version=\"3.4\" date=\"Tue Nov 24 12:41:18 2015\" author=\"Thomas Walzthoeni,Oliver Rinner\" homepage=\"http://proteomics.ethz.ch\" resultdir=\"aleitner_M1012_004_matched\" deffile=\"xquest.def\" >" << std::endl;

    // collect indices of spectra, that need to be written out
    std::vector <std::pair <Size, Size> > spectrum_indices;

    for (Size i = 0; i < all_top_csms.size(); ++i)
    {
      if (!all_top_csms[i].empty())
      {
        if (all_top_csms[i][0].scan_index_light < spectra.size() && all_top_csms[i][0].scan_index_heavy < spectra.size())
        {
          spectrum_indices.push_back( std::make_pair(all_top_csms[i][0].scan_index_light, all_top_csms[i][0].scan_index_heavy) );
        }
      }
    }

    // loop over list of indices and write out spectra
    for (Size i = 0; i < spectrum_indices.size(); ++i)
    {
      Size scan_index_light = spectrum_indices[i].first;
      Size scan_index_heavy = spectrum_indices[i].second;
      // TODO more correct alternative
      String spectrum_light_name = base_name + ".light." + scan_index_light;
      String spectrum_heavy_name = base_name + ".heavy." + scan_index_heavy;
      String spectrum_name = spectrum_light_name + String("_") + spectrum_heavy_name;

      if (scan_index_light < spectra.size() && scan_index_heavy < spectra.size() && i < preprocessed_pair_spectra.spectra_common_peaks.size() && i < preprocessed_pair_spectra.spectra_xlink_peaks.size())
      {
        // 4 Spectra resulting from a light/heavy spectra pair.  Write for each spectrum, that is written to xquest.xml (should be all considered pairs, or better only those with at least one sensible Hit, meaning a score was computed)
        spec_xml_file << "<spectrum filename=\"" << spectrum_light_name << ".dta" << "\" type=\"light\">" << std::endl;
        spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[scan_index_light], String(""));
        spec_xml_file << "</spectrum>" << std::endl;

        spec_xml_file << "<spectrum filename=\"" << spectrum_heavy_name << ".dta" << "\" type=\"heavy\">" << std::endl;
        spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[scan_index_heavy], String(""));
        spec_xml_file << "</spectrum>" << std::endl;

        // the preprocessed pair spectra are sorted by another index
        // because some pairs do not yield any resonable hits, the index from the spectrum matches or spectrum_indices does not address the right pair anymore
        // use find with the pair of spectrum indices to find the correct index for the preprocessed common and cross-linked ion spectra
        std::vector<std::pair <Size, Size> >::const_iterator pair_it = std::find(spectrum_pairs.begin(), spectrum_pairs.end(), spectrum_indices[i]);
        Size pair_index = std::distance(spectrum_pairs.begin(), pair_it);

        String spectrum_common_name = spectrum_name + String("_common.txt");
        spec_xml_file << "<spectrum filename=\"" << spectrum_common_name << "\" type=\"common\">" << std::endl;
        spec_xml_file << getxQuestBase64EncodedSpectrum_(preprocessed_pair_spectra.spectra_common_peaks[pair_index], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta");
        spec_xml_file << "</spectrum>" << std::endl;

        String spectrum_xlink_name = spectrum_name + String("_xlinker.txt");
        spec_xml_file << "<spectrum filename=\"" << spectrum_xlink_name << "\" type=\"xlinker\">" << std::endl;
        spec_xml_file << getxQuestBase64EncodedSpectrum_(preprocessed_pair_spectra.spectra_xlink_peaks[pair_index], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta");
        spec_xml_file << "</spectrum>" << std::endl;
      }
    }

    spec_xml_file << "</xquest_spectra>" << std::endl;
    spec_xml_file.close();

    return;
  }
Example #6
0
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in(getStringOption_("in"));
    String out(getStringOption_("out"));
    Size num_spots_per_row(getIntOption_("num_spots_per_row"));
    double RT_distance(getDoubleOption_("RT_distance"));

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile f;
    f.setLogType(log_type_);
    f.load(in, exp);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    ProgressLogger pl;
    pl.setLogType(log_type_);
    pl.startProgress(0, exp.size(), "Assigning pseudo RTs.");
    Size num_ms1(0), num_ms1_base(0), row_counter(0);
    bool row_to_reverse(false);
    double actual_RT(0);
    for (Size i = 0; i != exp.size(); ++i)
    {
      pl.setProgress(i);
      if (row_to_reverse)
      {
        actual_RT = (double)(num_ms1_base + (num_spots_per_row - row_counter)) * RT_distance;
        writeDebug_("RT=" + String(actual_RT) + " (modified, row_counter=" + String(row_counter) + ")", 1);
      }
      else
      {
        actual_RT = (double)num_ms1 * RT_distance;
        writeDebug_("RT=" + String(actual_RT), 1);
      }

      exp[i].setRT(actual_RT);

      if (exp[i].getMSLevel() == 1)
      {
        if (++row_counter >= num_spots_per_row)
        {
          row_counter = 0;
          if (row_to_reverse)
          {
            row_to_reverse = false;
          }
          else
          {
            row_to_reverse = true;
          }
        }
        ++num_ms1;
        if (!row_to_reverse)
        {
          num_ms1_base = num_ms1;
        }
      }
    }
    pl.endProgress();

    // sort the spectra according to their new RT
    exp.sortSpectra();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------


    f.store(out, exp);

    return EXECUTION_OK;
  }
Example #7
0
  /**
  * @brief Applies the peak-picking algorithm to a map (MSExperiment). This
  * method picks peaks for each scan in the map consecutively. The resulting
  * picked peaks are written to the output map.
  *
  * @param input  input map in profile mode
  * @param output  output map with picked peaks
  * @param boundaries_spec  boundaries of the picked peaks in spectra
  * @param boundaries_chrom  boundaries of the picked peaks in chromatograms
  * @param check_spectrum_type  if set, checks spectrum type and throws an exception if a centroided spectrum is passed 
  */
  void PeakPickerHiRes::pickExperiment(const PeakMap& input, PeakMap& output, 
                                       std::vector<std::vector<PeakBoundary> >& boundaries_spec, 
                                       std::vector<std::vector<PeakBoundary> >& boundaries_chrom,
                                       const bool check_spectrum_type) const
  {
    // make sure that output is clear
    output.clear(true);

    // copy experimental settings
    static_cast<ExperimentalSettings &>(output) = input;

    // resize output with respect to input
    output.resize(input.size());

    Size progress = 0;
    startProgress(0, input.size() + input.getChromatograms().size(), "picking peaks");

    if (input.getNrSpectra() > 0)
    {
      for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx)
      {
        if (ms_levels_.empty()) // auto mode
        {
          SpectrumSettings::SpectrumType spectrum_type = input[scan_idx].getType();
          if (spectrum_type == SpectrumSettings::CENTROID)
          {
            output[scan_idx] = input[scan_idx];
          }
          else
          {
            std::vector<PeakBoundary> boundaries_s; // peak boundaries of a single spectrum

            pick(input[scan_idx], output[scan_idx], boundaries_s);
            boundaries_spec.push_back(boundaries_s);
          }
        }
        else if (!ListUtils::contains(ms_levels_, input[scan_idx].getMSLevel())) // manual mode
        {
          output[scan_idx] = input[scan_idx];
        }
        else
        {
          std::vector<PeakBoundary> boundaries_s; // peak boundaries of a single spectrum

                                                  // determine type of spectral data (profile or centroided)
          SpectrumSettings::SpectrumType spectrum_type = input[scan_idx].getType();

          if (spectrum_type == SpectrumSettings::CENTROID && check_spectrum_type)
          {
            throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Centroided data provided but profile spectra expected.");
          }

          pick(input[scan_idx], output[scan_idx], boundaries_s);
          boundaries_spec.push_back(boundaries_s);
        }
        setProgress(++progress);
      }
    }


    for (Size i = 0; i < input.getChromatograms().size(); ++i)
    {
      MSChromatogram chromatogram;
      std::vector<PeakBoundary> boundaries_c; // peak boundaries of a single chromatogram
      pick(input.getChromatograms()[i], chromatogram, boundaries_c);
      output.addChromatogram(chromatogram);
      boundaries_chrom.push_back(boundaries_c);
      setProgress(++progress);
    }
    endProgress();

    return;
  }
Example #8
0
  ExitCodes main_(int, const char **) override
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input/output files
    String in(getStringOption_("in"));
    String out(getStringOption_("out"));

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile f;
    f.setLogType(log_type_);

    PeakFileOptions options;
    options.clearMSLevels();
    options.addMSLevel(2);
    f.getOptions() = options;
    f.load(in, exp);

    writeDebug_("Data set contains " + String(exp.size()) + " spectra", 1);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    vector<PeptideIdentification> pep_ids;
    CompNovoIdentificationCID comp_novo_id;

    // set the options
    Param algorithm_param = getParam_().copy("algorithm:", true);
    comp_novo_id.setParameters(algorithm_param);
    comp_novo_id.getIdentifications(pep_ids, exp);
    algorithm_param = comp_novo_id.getParameters();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    DateTime now = DateTime::now();
    String date_string = now.get();
    String identifier("CompNovoCID_" + date_string);

    for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it)
    {
      it->assignRanks();
      it->setIdentifier(identifier);
    }

    vector<ProteinIdentification> prot_ids;
    ProteinIdentification prot_id;
    prot_id.setIdentifier(identifier);
    prot_id.setDateTime(now);
    StringList ms_runs;
    exp.getPrimaryMSRunPath(ms_runs);
    prot_id.setPrimaryMSRunPath(ms_runs);

    ProteinIdentification::SearchParameters search_parameters;
    search_parameters.charges = "+2-+3";
    if (algorithm_param.getValue("tryptic_only").toBool())
    {
      search_parameters.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme("Trypsin"));
    }
    else
    {
      search_parameters.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme("no cleavage"));
    }
    search_parameters.mass_type = ProteinIdentification::MONOISOTOPIC;
    search_parameters.fixed_modifications = algorithm_param.getValue("fixed_modifications");
    search_parameters.variable_modifications = algorithm_param.getValue("variable_modifications");

    search_parameters.missed_cleavages = (UInt)algorithm_param.getValue("missed_cleavages");
    search_parameters.fragment_mass_tolerance = (double)algorithm_param.getValue("fragment_mass_tolerance");
    search_parameters.precursor_mass_tolerance = (double)algorithm_param.getValue("precursor_mass_tolerance");
    search_parameters.fragment_mass_tolerance_ppm = false;
    search_parameters.precursor_mass_tolerance_ppm = false;
    prot_id.setSearchParameters(search_parameters);
    prot_id.setSearchEngineVersion("0.9beta");
    prot_id.setSearchEngine("CompNovo");
    prot_ids.push_back(prot_id);

    IdXMLFile().store(out, prot_ids, pep_ids);

    return EXECUTION_OK;
  }
    // Wrong assignment of the mono-isotopic mass for precursors are assumed:
    // - if precursor_mz matches the mz of a non-monoisotopic feature mass trace
    // - and in the case that believe_charge is true: if feature_charge matches the precursor_charge
    // In the case of wrong mono-isotopic assignment several options for correction are available:
    // keep_original will create a copy of the precursor and tandem spectrum for the new mono-isotopic mass trace and retain the original one
    // all_matching_features does this not for only the closest feature but all features in a question
    set<Size> correctToNearestFeature(const FeatureMap& features, PeakMap & exp, double rt_tolerance_s = 0.0, double mz_tolerance = 0.0, bool ppm = true, bool believe_charge = false, bool keep_original = false, bool all_matching_features = false, int max_trace = 2)
    {
      set<Size> corrected_precursors;
      // for each precursor/MS2 find all features that are in the given tolerance window (bounding box + rt tolerances)
      // if believe_charge is set, only add features that match the precursor charge
      map<Size, set<Size> > scan_idx_to_feature_idx;

      for (Size scan = 0; scan != exp.size(); ++scan)
      {
        // skip non-tandem mass spectra
        if (exp[scan].getMSLevel() != 2 || exp[scan].getPrecursors().empty()) continue;

        // extract precusor / MS2 information
        const double pc_mz = exp[scan].getPrecursors()[0].getMZ();
        const double rt = exp[scan].getRT();
        const int pc_charge = exp[scan].getPrecursors()[0].getCharge();

        for (Size f = 0; f != features.size(); ++f)
        {
          // feature  is incompatible if believe_charge is set and charges don't match
          if (believe_charge && features[f].getCharge() != pc_charge) continue;

          // check if precursor/MS2 position overlap with feature
          if (overlaps_(features[f], rt, pc_mz, rt_tolerance_s))
          {
            scan_idx_to_feature_idx[scan].insert(f);
          }
        }
      }

      // filter sets to retain compatible features:
      // if precursor_mz = feature_mz + n * feature_charge (+/- mz_tolerance) a feature is compatible, others are removed from the set
      for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
      {
        const Size scan = it->first;
        const double pc_mz = exp[scan].getPrecursors()[0].getMZ();
        const double mz_tolerance_da = ppm ? pc_mz * mz_tolerance * 1e-6  : mz_tolerance;

        // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
        for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); )
        {
          if (!compatible_(features[*sit], pc_mz, mz_tolerance_da, max_trace))
          {
            it->second.erase(sit++);
          }
          else
          {
            ++sit;
          }
        }
      }

      // remove entries with no compatible features (empty sets).
      // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
      for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); )
      {
        if (it->second.empty())
        {
          scan_idx_to_feature_idx.erase(it++);
        }
        else
        {
          ++it;
        }
      }

      if (debug_level_ > 0)
      {
        LOG_INFO << "Number of precursors with compatible features: " << scan_idx_to_feature_idx.size() << endl;
      }

      if (!all_matching_features)
      {
        // keep only nearest features in set
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          const double pc_rt = exp[scan].getRT();

          double min_distance = 1e16;
          set<Size>::iterator best_feature = it->second.begin();

          // determine nearest/best feature
          for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ++sit)
          {
            const double current_distance = fabs(pc_rt - features[*sit].getRT());
            if (current_distance < min_distance)
            {
              min_distance = current_distance;
              best_feature = sit;
            }
          }

          // delete all except the nearest/best feature
          // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
          for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); )
          {
            if (sit != best_feature)
            {
              it->second.erase(sit++);
            }
            else
            {
              ++sit;
            }
          }
        }
      }

      // depending on all_matching_features option, only the nearest or all features are contained in the sets
      // depending on options: move/copy corrected precursor and tandem spectrum
      if (keep_original)
      {
        // duplicate spectra for each feature in set and adapt precursor_mz and precursor_charge to feature_mz and feature_charge
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          MSSpectrum<> spectrum = exp[scan];
          corrected_precursors.insert(scan);
          for (set<Size>::iterator f_it = it->second.begin(); f_it != it->second.end(); ++f_it)
          {
            spectrum.getPrecursors()[0].setMZ(features[*f_it].getMZ());
            spectrum.getPrecursors()[0].setCharge(features[*f_it].getCharge());
            exp.addSpectrum(spectrum);
          }
        }
      }
      else
      {
        // set precursor_mz and _charge to the feature_mz and _charge
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          exp[scan].getPrecursors()[0].setMZ(features[*it->second.begin()].getMZ());
          exp[scan].getPrecursors()[0].setCharge(features[*it->second.begin()].getCharge());
          corrected_precursors.insert(scan);
        }
      }
      return corrected_precursors;
    }
  ExitCodes main_(int argc, const char** argv)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input/output files
    String in(getStringOption_("in")), out(getStringOption_("out"));
    FileHandler fh;
    FileTypes::Type in_type = fh.getType(in);

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------

    PeakMap exp;
    // keep only MS2 spectra
    fh.getOptions().addMSLevel(2);
    fh.loadExperiment(in, exp, in_type, log_type_);
    writeDebug_(String("Spectra loaded: ") + exp.size(), 2);

    if (exp.getSpectra().empty())
    {
      throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file.");
    }

    // determine type of spectral data (profile or centroided)
    SpectrumSettings::SpectrumType spectrum_type = exp[0].getType();

    if (spectrum_type == SpectrumSettings::RAWDATA)
    {
      if (!getFlag_("force"))
      {
        throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag.");
      }
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    Param mascot_param = getParam_().copy("Mascot_parameters:", true);
    MascotGenericFile mgf_file;
    Param p;
    // TODO: switch this to mzML (much smaller)
    p.setValue("internal:format", "Mascot generic", "Sets the format type of the peak list, this should not be changed unless you write the header only.", ListUtils::create<String>("advanced"));
    p.setValue("internal:HTTP_format", "true", "Write header with MIME boundaries instead of simple key-value pairs. For HTTP submission only.", ListUtils::create<String>("advanced"));
    p.setValue("internal:content", "all", "Use parameter header + the peak lists with BEGIN IONS... or only one of them.", ListUtils::create<String>("advanced"));
    mgf_file.setParameters(mascot_param);

    // get the spectra into string stream
    writeDebug_("Writing MGF file to stream", 1);
    stringstream ss;
    mgf_file.store(ss, in, exp, true); // write in compact format

    // Usage of a QCoreApplication is overkill here (and ugly too), but we just use the
    // QEventLoop to process the signals and slots and grab the results afterwards from
    // the MascotRemotQuery instance
    char** argv2 = const_cast<char**>(argv);
    QCoreApplication event_loop(argc, argv2);
    MascotRemoteQuery* mascot_query = new MascotRemoteQuery(&event_loop);
    Param mascot_query_param = getParam_().copy("Mascot_server:", true);
    writeDebug_("Setting parameters for Mascot query", 1);
    mascot_query->setParameters(mascot_query_param);
    writeDebug_("Setting spectra for Mascot query", 1);
    mascot_query->setQuerySpectra(ss.str());

    // remove unnecessary spectra
    ss.clear();

    QObject::connect(mascot_query, SIGNAL(done()), &event_loop, SLOT(quit()));
    QTimer::singleShot(1000, mascot_query, SLOT(run()));
    writeDebug_("Fire off Mascot query", 1);
    event_loop.exec();
    writeDebug_("Mascot query finished", 1);

    if (mascot_query->hasError())
    {
      writeLog_("An error occurred during the query: " + mascot_query->getErrorMessage());
      delete mascot_query;
      return EXTERNAL_PROGRAM_ERROR;
    }

    // write Mascot response to file
    String mascot_tmp_file_name(File::getTempDirectory() + "/" + File::getUniqueName() + "_Mascot_response");
    QFile mascot_tmp_file(mascot_tmp_file_name.c_str());
    mascot_tmp_file.open(QIODevice::WriteOnly);
    mascot_tmp_file.write(mascot_query->getMascotXMLResponse());
    mascot_tmp_file.close();

    // clean up
    delete mascot_query;

    vector<PeptideIdentification> pep_ids;
    ProteinIdentification prot_id;

    // set up mapping between scan numbers and retention times:
    MascotXMLFile::RTMapping rt_mapping;
    MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping);

    // read the response
    MascotXMLFile().load(mascot_tmp_file_name, prot_id, pep_ids, rt_mapping);
    writeDebug_("Read " + String(pep_ids.size()) + " peptide ids and " + String(prot_id.getHits().size()) + " protein identifications from Mascot", 5);

    // for debugging errors relating to unexpected response files
    if (this->debug_level_ >= 100)
    {
      writeDebug_(String("\nMascot Server Response file saved to: '") + mascot_tmp_file_name + "'. If an error occurs, send this file to the OpenMS team.\n", 100);
    }
    else
    {
      // delete file
      mascot_tmp_file.remove();
    }

    // keep or delete protein identifications?!
    vector<ProteinIdentification> prot_ids;
    if (!getFlag_("keep_protein_links"))
    {
      // remove protein links from peptides
      for (Size i = 0; i < pep_ids.size(); ++i)
      {
        std::vector<PeptideHit> hits = pep_ids[i].getHits();
        for (Size h = 0; h < hits.size(); ++h)
        {
          hits[h].setPeptideEvidences(vector<PeptideEvidence>());
        }
        pep_ids[i].setHits(hits);
      }
      // remove proteins
      std::vector<ProteinHit> p_hit;
      prot_id.setHits(p_hit);
    }
    prot_ids.push_back(prot_id);

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    IdXMLFile().store(out, prot_ids, pep_ids);

    return EXECUTION_OK;
  }
  ExitCodes
  main_(int, const char**)
  {
    //-------------------------------------------------------------
    // general variables and data
    //-------------------------------------------------------------
    FileHandler fh;
    vector<PeptideIdentification> peptide_identifications;
    vector<ProteinIdentification> protein_identifications;

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    const String in = getStringOption_("in");

    ProgressLogger logger;
    logger.setLogType(ProgressLogger::CMD);
    logger.startProgress(0, 1, "Loading...");

    if (File::isDirectory(in))
    {
      const String in_directory = File::absolutePath(in).ensureLastChar('/');
      const String mz_file = getStringOption_("mz_file");
      const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide");

      UInt i = 0;
      FileHandler fh;
      FileTypes::Type type;
      MSExperiment<Peak1D> msexperiment;
      // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;)  However, now String::toInt() might throw.
      map<Int, float> num_and_rt;
      vector<String> NativeID;

      // The mz-File (if given)
      if (!mz_file.empty())
      {
        type = fh.getTypeByFileName(mz_file);
        fh.loadExperiment(mz_file, msexperiment, type);

        for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it)
        {
          String(spectra_it->getNativeID()).split('=', NativeID);
          try
          {
            num_and_rt[NativeID[1].toInt()] = spectra_it->getRT();
            // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01
          }
          catch (Exception::ConversionError& e)
          {
            writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
          }
        }
      }

      // Get list of the actual Sequest .out-Files
      StringList in_files;
      if (!File::fileList(in_directory, String("*.out"), in_files))
      {
        writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!");
      }

      // Now get to work ...
      for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it)
      {
        vector<PeptideIdentification> peptide_ids_seq;
        ProteinIdentification protein_id_seq;
        vector<double> pvalues_seq;
        vector<String> in_file_vec;

        SequestOutfile sequest_outfile;

        writeDebug_(String("Reading file ") + *in_files_it, 3);

        try
        {
          sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide);

          in_files_it->split('.', in_file_vec);

          for (Size j = 0; j < peptide_ids_seq.size(); ++j)
          {

            // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files.
            peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i);

            Int scan_number = 0;
            if (!mz_file.empty())
            {
              try
              {
                scan_number = in_file_vec[2].toInt();
                peptide_ids_seq[j].setRT(num_and_rt[scan_number]);
              }
              catch (Exception::ConversionError& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
              }
              catch (exception& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.what());
              }
              //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz
              const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge();
              peptide_ids_seq[j].setMZ(real_mz);
            }

            writeDebug_(String("scan: ") + String(scan_number) + String("  RT: ") + String(peptide_ids_seq[j].getRT()) + "  MZ: " + String(peptide_ids_seq[j].getMZ()) + "  Ident: " + peptide_ids_seq[j].getIdentifier(), 4);

            peptide_identifications.push_back(peptide_ids_seq[j]);
          }

          protein_id_seq.setIdentifier(*in_files_it + "_" + i);
          protein_identifications.push_back(protein_id_seq);
          ++i;
        }
        catch (Exception::ParseError& pe)
        {
          writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")");
          throw;
        }
        catch (...)
        {
          writeLog_(String("Error reading file: ") + *in_files_it);
          throw;
        }
      }

      writeDebug_("All files processed.", 3);
    } // ! directory
    else
    {
      FileTypes::Type in_type = fh.getType(in);

      if (in_type == FileTypes::PEPXML)
      {
        String exp_name = getStringOption_("mz_file");
        String orig_name =  getStringOption_("mz_name");
        bool use_precursor_data = getFlag_("use_precursor_data");

        if (exp_name.empty())
        {
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, orig_name);
        }
        else
        {
          MSExperiment<> exp;
          fh.loadExperiment(exp_name, exp);
          if (!orig_name.empty())
          {
            exp_name = orig_name;
          }
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, exp_name, exp,
                            use_precursor_data);
        }
      }
      else if (in_type == FileTypes::IDXML)
      {
        IdXMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::MZIDENTML)
      {
        LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl;
        MzIdentMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::PROTXML)
      {
        protein_identifications.resize(1);
        peptide_identifications.resize(1);
        ProtXMLFile().load(in, protein_identifications[0],
                           peptide_identifications[0]);
      }
      else if (in_type == FileTypes::OMSSAXML)
      {
        protein_identifications.resize(1);
        OMSSAXMLFile().load(in, protein_identifications[0],
                            peptide_identifications, true);
      }
      else if (in_type == FileTypes::MASCOTXML)
      {
        String scan_regex = getStringOption_("scan_regex");
        String exp_name = getStringOption_("mz_file");
        MascotXMLFile::RTMapping rt_mapping;
        if (!exp_name.empty())
        {
          PeakMap exp;
          // load only MS2 spectra:
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping);
        }
        protein_identifications.resize(1);
        MascotXMLFile().load(in, protein_identifications[0],
                             peptide_identifications, rt_mapping, scan_regex);
      }
      else if (in_type == FileTypes::XML)
      {
        ProteinIdentification protein_id;
        XTandemXMLFile().load(in, protein_id, peptide_identifications);
        protein_id.setSearchEngineVersion("");
        protein_id.setSearchEngine("XTandem");
        protein_identifications.push_back(protein_id);
        String exp_name = getStringOption_("mz_file");
        if (!exp_name.empty())
        {
          PeakMap exp;
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it)
          {
            UInt id = (Int)it->getMetaValue("spectrum_id");
            --id; // native IDs were written 1-based
            if (id < exp.size())
            {
              it->setRT(exp[id].getRT());
              double pre_mz(0.0);
              if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ();
              it->setMZ(pre_mz);
              it->removeMetaValue("spectrum_id");
            }
            else
            {
              LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl;
            }
          }
        }
      }
      else
      {
        writeLog_("Unknown input file type given. Aborting!");
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
    }
    logger.endProgress();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    const String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));
    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }
    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    logger.startProgress(0, 1, "Storing...");
    if (out_type == FileTypes::PEPXML)
    {
      bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed");
      String mz_file = getStringOption_("mz_file");
      String mz_name = getStringOption_("mz_name");
      PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed);
    }
    else if (out_type == FileTypes::IDXML)
    {
      IdXMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::MZIDENTML)
    {
      MzIdentMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::FASTA)
    {
      Size count = 0;
      ofstream fasta(out.c_str(), ios::out);
      for (Size i = 0; i < peptide_identifications.size(); ++i)
      {
        for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l)
        {
          const PeptideHit& hit = peptide_identifications[i].getHits()[l];
          fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++
                << "|" << hit.getSequence().toString() << endl;
          String seq = hit.getSequence().toUnmodifiedString();
          // FASTA files should have at most 60 characters of sequence info per line
          for (Size j = 0; j < seq.size(); j += 60)
          {
            Size k = min(j + 60, seq.size());
            fasta << string(seq[j], seq[k]) << endl;
          }
        }
      }
    }
    else
    {
      writeLog_("Unsupported output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }
    logger.endProgress();


    return EXECUTION_OK;
  }
Example #12
0
  TOLERANCE_ABSOLUTE(0.01)

  MzDataFile file;
  PeakMap e;

  // real test
  file.load(OPENMS_GET_TEST_DATA_PATH("MzDataFile_1.mzData"), e);

  //test DocumentIdentifier addition
  TEST_STRING_EQUAL(e.getLoadedFilePath(), OPENMS_GET_TEST_DATA_PATH("MzDataFile_1.mzData"));
  TEST_STRING_EQUAL(FileTypes::typeToName(e.getLoadedFileType()), "mzData");

  //---------------------------------------------------------------------------
  // ms-level, RT, native ID
  //---------------------------------------------------------------------------
  TEST_EQUAL(e.size(), 3)
  TEST_EQUAL(e[0].getMSLevel(), 1)
  TEST_EQUAL(e[1].getMSLevel(), 2)
  TEST_EQUAL(e[2].getMSLevel(), 1)
  TEST_REAL_SIMILAR(e[0].getRT(), 60)
  TEST_REAL_SIMILAR(e[1].getRT(), 120)
  TEST_REAL_SIMILAR(e[2].getRT(), 180)
  TEST_STRING_EQUAL(e[0].getNativeID(), "spectrum=10")
  TEST_STRING_EQUAL(e[1].getNativeID(), "spectrum=11")
  TEST_STRING_EQUAL(e[2].getNativeID(), "spectrum=12")
  TEST_EQUAL(e[0].getType(), SpectrumSettings::UNKNOWN)

  //---------------------------------------------------------------------------
  //meta data array meta data
  //---------------------------------------------------------------------------
  TEST_EQUAL(e[0].getFloatDataArrays()[0].getMetaValue("URL"), "www.open-ms.de")
Example #13
0
  TOLERANCE_ABSOLUTE(0.01)

  PeakMap e;
  DTA2DFile file;

  //test exception
  TEST_EXCEPTION( Exception::FileNotFound , file.load("dummy/dummy.dta2d",e) )

  // real test
  file.load(OPENMS_GET_TEST_DATA_PATH("DTA2DFile_test_1.dta2d"),e);

  //test DocumentIdentifier addition
  TEST_STRING_EQUAL(e.getLoadedFilePath(), OPENMS_GET_TEST_DATA_PATH("DTA2DFile_test_1.dta2d"));
  TEST_STRING_EQUAL(FileTypes::typeToName(e.getLoadedFileType()),"dta2d");

  TEST_EQUAL(e.size(), 9);
  ABORT_IF(e.size() != 9)

  TEST_STRING_EQUAL(e[0].getNativeID(),"index=0")
  TEST_STRING_EQUAL(e[1].getNativeID(),"index=1")
  TEST_STRING_EQUAL(e[2].getNativeID(),"index=2")
  TEST_STRING_EQUAL(e[3].getNativeID(),"index=3")
  TEST_STRING_EQUAL(e[4].getNativeID(),"index=4")
  TEST_STRING_EQUAL(e[5].getNativeID(),"index=5")
  TEST_STRING_EQUAL(e[6].getNativeID(),"index=6")
  TEST_STRING_EQUAL(e[7].getNativeID(),"index=7")
  TEST_STRING_EQUAL(e[8].getNativeID(),"index=8")

  PeakMap::const_iterator it(e.begin());
  TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 230.02)
  TEST_REAL_SIMILAR(it->getRT(), 4711.1)
Example #14
0
	chrom2.setChromatogramType(ChromatogramSettings::SELECTED_REACTION_MONITORING_CHROMATOGRAM);
	ChromatogramPeak peak1, peak2, peak3;
	peak1.setRT(0.1);
	peak2.setRT(0.2);
	peak3.setRT(0.3);
	chrom1.push_back(peak1);
	chrom1.push_back(peak2);

	chrom2.push_back(peak2);
	chrom2.push_back(peak2);

	exp.addChromatogram(chrom1);
	exp.addChromatogram(chrom2);

	TEST_EQUAL(exp.size(), 0)
	TEST_EQUAL(exp.getChromatograms().size(), 2)
	ChromatogramTools().convertChromatogramsToSpectra(exp);
	TEST_EQUAL(exp.size(), 4)
	TEST_EQUAL(exp.getChromatograms().size(), 0)
	TEST_REAL_SIMILAR(exp[0][0].getMZ(), 200.1)

	TEST_EQUAL(exp[0].getPrecursors().size(), 1)
	TEST_REAL_SIMILAR(exp[0].getPrecursors().begin()->getMZ(), 100.1)


}
END_SECTION

START_SECTION(template <typename ExperimentType> void convertSpectraToChromatograms(ExperimentType& exp, bool remove_spectra = false))
{
  ExitCodes main_(int, const char**)
  {
    // instance specific location of settings in INI file (e.g. 'TOPP_Skeleton:1:')
    String ini_location;
    // path to the log file
    String logfile(getStringOption_("log"));
    String xtandem_executable(getStringOption_("xtandem_executable"));
    String inputfile_name;
    String outputfile_name;

    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------

    inputfile_name = getStringOption_("in");
    writeDebug_(String("Input file: ") + inputfile_name, 1);
    if (inputfile_name == "")
    {
      writeLog_("No input file specified. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    outputfile_name = getStringOption_("out");
    writeDebug_(String("Output file: ") + outputfile_name, 1);
    if (outputfile_name == "")
    {
      writeLog_("No output file specified. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    // write input xml file
    String temp_directory = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files
    {
      QDir d;
      d.mkpath(temp_directory.toQString());
    }

    String input_filename(temp_directory + "_tandem_input_file.xml");
    String tandem_input_filename(temp_directory + "_tandem_input_file.mzData");
    String tandem_output_filename(temp_directory + "_tandem_output_file.xml");
    String tandem_taxonomy_filename(temp_directory + "_tandem_taxonomy_file.xml");

    //-------------------------------------------------------------
    // Validate user parameters
    //-------------------------------------------------------------
    if (getIntOption_("min_precursor_charge") > getIntOption_("max_precursor_charge"))
    {
      LOG_ERROR << "Given charge range is invalid: max_precursor_charge needs to be >= min_precursor_charge." << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    String db_name(getStringOption_("database"));
    if (!File::readable(db_name))
    {
      String full_db_name;
      try
      {
        full_db_name = File::findDatabase(db_name);
      }
      catch (...)
      {
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
      db_name = full_db_name;
    }


    PeakMap exp;
    MzMLFile mzml_file;
    mzml_file.getOptions().addMSLevel(2); // only load msLevel 2
    mzml_file.setLogType(log_type_);
    mzml_file.load(inputfile_name, exp);

    if (exp.getSpectra().empty())
    {
      throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file.");
    }

    // determine type of spectral data (profile or centroided)
    SpectrumSettings::SpectrumType spectrum_type = exp[0].getType();

    if (spectrum_type == SpectrumSettings::RAWDATA)
    {
      if (!getFlag_("force"))
      {
        throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag.");
      }
    }

    // we need to replace the native id with a simple numbering schema, to be able to
    // map the IDs back to the spectra (RT, and MZ information)
    Size native_id(0);
    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      it->setNativeID(++native_id);
    }

    // We store the file in mzData file format, because MGF files somehow produce in most
    // of the cases IDs with charge 2+. We do not use the input file directly
    // because XTandem sometimes stumbles over misleading substrings in the filename,
    // e.g. mzXML ...
    MzDataFile mzdata_outfile;
    mzdata_outfile.store(tandem_input_filename, exp);

    XTandemInfile infile;
    infile.setInputFilename(tandem_input_filename);
    infile.setOutputFilename(tandem_output_filename);

    ofstream tax_out(tandem_taxonomy_filename.c_str());
    tax_out << "<?xml version=\"1.0\"?>" << "\n";
    tax_out << "\t<bioml label=\"x! taxon-to-file matching list\">" << "\n";
    tax_out << "\t\t<taxon label=\"OpenMS_dummy_taxonomy\">" << "\n";
    tax_out << "\t\t\t<file format=\"peptide\" URL=\"" << db_name << "\" />" << "\n";
    tax_out << "\t</taxon>" << "\n";
    tax_out << "</bioml>" << "\n";
    tax_out.close();

    infile.setTaxonomyFilename(tandem_taxonomy_filename);

    if (getStringOption_("precursor_error_units") == "Da")
    {
      infile.setPrecursorMassErrorUnit(XTandemInfile::DALTONS);
    }
    else
    {
      infile.setPrecursorMassErrorUnit(XTandemInfile::PPM);
    }

    if (getStringOption_("fragment_error_units") == "Da")
    {
      infile.setFragmentMassErrorUnit(XTandemInfile::DALTONS);
    }
    else
    {
      infile.setFragmentMassErrorUnit(XTandemInfile::PPM);
    }

    if (getStringOption_("default_input_file") != "")
    {
      infile.load(getStringOption_("default_input_file"));
      infile.setDefaultParametersFilename(getStringOption_("default_input_file"));
    }
    else
    {
      String default_file = File::find("CHEMISTRY/XTandem_default_input.xml");
      infile.load(default_file);
      infile.setDefaultParametersFilename(default_file);
    }

    infile.setPrecursorMassTolerancePlus(getDoubleOption_("precursor_mass_tolerance"));
    infile.setPrecursorMassToleranceMinus(getDoubleOption_("precursor_mass_tolerance"));
    infile.setFragmentMassTolerance(getDoubleOption_("fragment_mass_tolerance"));
    infile.setMaxPrecursorCharge(getIntOption_("max_precursor_charge"));
    infile.setNumberOfThreads(getIntOption_("threads"));
    infile.setModifications(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications")));
    infile.setTaxon("OpenMS_dummy_taxonomy");
    infile.setOutputResults(getStringOption_("output_results"));
    infile.setMaxValidEValue(getDoubleOption_("max_valid_expect"));
    infile.setCleavageSite(getStringOption_("cleavage_site"));
    infile.setNumberOfMissedCleavages(getIntOption_("missed_cleavages"));
    infile.setRefine(getFlag_("refinement"));
    infile.setSemiCleavage(getFlag_("semi_cleavage"));
    bool allow_isotope_error = getStringOption_("allow_isotope_error") == "yes" ? true : false;
    infile.setAllowIsotopeError(allow_isotope_error);

    infile.write(input_filename);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    int status = QProcess::execute(xtandem_executable.toQString(), QStringList(input_filename.toQString())); // does automatic escaping etc...
    if (status != 0)
    {
      writeLog_("XTandem problem. Aborting! Calling command was: '" + xtandem_executable + " \"" + input_filename + "\"'.\nDoes the !XTandem executable exist?");
      // clean temporary files
      if (this->debug_level_ < 2)
      {
        File::removeDirRecursively(temp_directory);
        LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl;
      }
      else
      {
        LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl;
      }
      return EXTERNAL_PROGRAM_ERROR;
    }

    vector<ProteinIdentification> protein_ids;
    ProteinIdentification protein_id;
    vector<PeptideIdentification> peptide_ids;

    // read the output of X!Tandem and write it to idXML
    XTandemXMLFile tandem_output;
    tandem_output.setModificationDefinitionsSet(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications")));
    // find the file, because XTandem extends the filename with a timestamp we do not know (exactly)
    StringList files;
    File::fileList(temp_directory, "_tandem_output_file*.xml", files);
    if (files.size() != 1)
    {
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, tandem_output_filename);
    }
    tandem_output.load(temp_directory + files[0], protein_id, peptide_ids);

    // now put the RTs into the peptide_ids from the spectrum ids
    for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it != peptide_ids.end(); ++it)
    {
      UInt id = (Int)it->getMetaValue("spectrum_id");
      --id; // native IDs were written 1-based
      if (id < exp.size())
      {
        it->setRT(exp[id].getRT());
        double pre_mz(0.0);
        if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ();
        it->setMZ(pre_mz);
        //it->removeMetaValue("spectrum_id");
      }
      else
      {
        LOG_ERROR << "XTandemAdapter: Error: id '" << id << "' not found in peak map!" << endl;
      }
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    // handle the search parameters
    ProteinIdentification::SearchParameters search_parameters;
    search_parameters.db = getStringOption_("database");
    search_parameters.charges = "+" + String(getIntOption_("min_precursor_charge")) + "-+" + String(getIntOption_("max_precursor_charge"));

    ProteinIdentification::PeakMassType mass_type = ProteinIdentification::MONOISOTOPIC;
    search_parameters.mass_type = mass_type;
    search_parameters.fixed_modifications = getStringList_("fixed_modifications");
    search_parameters.variable_modifications = getStringList_("variable_modifications");
    search_parameters.missed_cleavages = getIntOption_("missed_cleavages");
    search_parameters.peak_mass_tolerance = getDoubleOption_("fragment_mass_tolerance");
    search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance");

    protein_id.setSearchParameters(search_parameters);
    protein_id.setSearchEngineVersion("");
    protein_id.setSearchEngine("XTandem");

    protein_ids.push_back(protein_id);

    IdXMLFile().store(outputfile_name, protein_ids, peptide_ids);

    /// Deletion of temporary files
    if (this->debug_level_ < 2)
    {
      File::removeDirRecursively(temp_directory);
      LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl;
    }
    else
    {
      LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl;
    }

    // some stats
    LOG_INFO << "Statistics:\n"
             << "  identified MS2 spectra: " << peptide_ids.size() << " / " << exp.size() << " = " << int(peptide_ids.size() * 100.0 / exp.size()) << "% (with e-value < " << String(getDoubleOption_("max_valid_expect")) << ")" << std::endl;

    return EXECUTION_OK;
  }
Example #16
0
START_SECTION((SpectraMerger(const SpectraMerger& source)))
	SpectraMerger copy(*e_ptr);
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
END_SECTION

START_SECTION((SpectraMerger& operator=(const SpectraMerger& source)))
	SpectraMerger copy;
	copy = *e_ptr;
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
END_SECTION

START_SECTION((template < typename MapType > void mergeSpectraBlockWise(MapType &exp)))
	PeakMap exp, exp2;
	MzMLFile().load(OPENMS_GET_TEST_DATA_PATH("SpectraMerger_input_2.mzML"), exp);
	TEST_EQUAL(exp.size(), 144)

  exp2 = exp;

	SpectraMerger merger;
  Param p;
  p.setValue("block_method:rt_block_size", 5);
  p.setValue("block_method:ms_levels", IntList::create(StringList::create("1")));
  merger.setParameters(p);
  merger.mergeSpectraBlockWise(exp);
  TEST_EQUAL(exp.size(), 130);
  exp=exp2;

  p.setValue("block_method:rt_block_size", 4);
  p.setValue("block_method:ms_levels", IntList::create(StringList::create("2")));
  merger.setParameters(p);
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    in = getStringOption_("in");
    out = getStringOption_("out");
    String process_option = getStringOption_("processOption");

    Param filter_param = getParam_().copy("algorithm:", true);
    writeDebug_("Parameters passed to filter", filter_param, 3);

    SavitzkyGolayFilter sgolay;
    sgolay.setLogType(log_type_);
    sgolay.setParameters(filter_param);

    if (process_option == "lowmemory")
    {
      return doLowMemAlgorithm(sgolay);
    }

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    MzMLFile mz_data_file;
    mz_data_file.setLogType(log_type_);
    PeakMap exp;
    mz_data_file.load(in, exp);

    if (exp.empty() && exp.getChromatograms().size() == 0)
    {
      LOG_WARN << "The given file does not contain any conventional peak data, but might"
                  " contain chromatograms. This tool currently cannot handle them, sorry.";
      return INCOMPATIBLE_INPUT_DATA;
    }
    //check for peak type (profile data required)
    if (!exp.empty() && PeakTypeEstimator().estimateType(exp[0].begin(), exp[0].end()) == SpectrumSettings::PEAKS)
    {
      writeLog_("Warning: OpenMS peak type estimation indicates that this is not profile data!");
    }

    //check if spectra are sorted
    for (Size i = 0; i < exp.size(); ++i)
    {
      if (!exp[i].isSorted())
      {
        writeLog_("Error: Not all spectra are sorted according to peak m/z positions. Use FileFilter to sort the input!");
        return INCOMPATIBLE_INPUT_DATA;
      }
    }

    //check if chromatograms are sorted
    for (Size i = 0; i < exp.getChromatograms().size(); ++i)
    {
      if (!exp.getChromatogram(i).isSorted())
      {
        writeLog_("Error: Not all chromatograms are sorted according to peak m/z positions. Use FileFilter to sort the input!");
        return INCOMPATIBLE_INPUT_DATA;
      }
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    sgolay.filterExperiment(exp);

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    //annotate output with data processing info
    addDataProcessing_(exp, getProcessingInfo_(DataProcessing::SMOOTHING));

    mz_data_file.store(out, exp);

    return EXECUTION_OK;
  }
Example #18
0
    ExitCodes main_(int , const char**) override
    {

      // path to the log file
      String logfile(getStringOption_("log"));
      String pepnovo_executable(getStringOption_("pepnovo_executable"));

      PeakMap exp;

      String inputfile_name = getStringOption_("in");
      writeDebug_(String("Input file: ") + inputfile_name, 1);

      String outputfile_name = getStringOption_("out");
      writeDebug_(String("Output file: ") + outputfile_name, 1);

      String model_directory = getStringOption_("model_directory");
      writeDebug_(String("model directory: ") + model_directory, 1);

      String model_name = getStringOption_("model");
      writeDebug_(String("model directory: ") + model_name, 1);

      double fragment_tolerance = getDoubleOption_("fragment_tolerance");
      if (fragment_tolerance!=-1.0 && (fragment_tolerance<0 || fragment_tolerance>0.75))
      {
        writeLog_("Invalid fragment tolerance");
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }

      double pm_tolerance = getDoubleOption_("pm_tolerance");
      if (pm_tolerance!=-1.0 && (pm_tolerance<0.0 || pm_tolerance>5.0))
      {
        writeLog_("Invalid fragment tolerance");
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }

      Int tag_length = getIntOption_("tag_length");
      if ( tag_length!=-1 && (tag_length<3 || tag_length>6))
      {
        writeLog_("Invalid fragment tolerance");
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
      String digest = getStringOption_("digest");
      Size num_solutions=getIntOption_("num_solutions");

      //-------------------------------------------------------------
      // reading input
      //-------------------------------------------------------------

      // only load msLevel 2
      MzMLFile mzml_infile;
      mzml_infile.getOptions().addMSLevel(2);
      mzml_infile.setLogType(log_type_);
      mzml_infile.load(inputfile_name, exp);

      // we map the native id to the MZ and RT to be able to
      // map the IDs back to the spectra (RT, and MZ Meta Information)
      PepNovoOutfile::IndexPosMappingType index_to_precursor;
      for (Size i = 0; i < exp.size(); ++i)
      {
        index_to_precursor[i]= make_pair(exp[i].getRT(), exp[i].getPrecursors()[0].getPosition()[0]); //set entry <RT, MZ>
      }

      logfile = getStringOption_("log");
      
      QDir qdir_models_source(model_directory.c_str());
      if (!qdir_models_source.exists())
      {
        writeLog_("The model directory does not exist");
        return INPUT_FILE_NOT_FOUND;
      }
      
      // create temp directory
      QDir qdir_temp(File::getTempDirectory().toQString());
      String temp_data_directory = File::getUniqueName();
      qdir_temp.mkdir(temp_data_directory.toQString());
      qdir_temp.cd(temp_data_directory.toQString());
      temp_data_directory  = File::getTempDirectory() + "/" + temp_data_directory; // delete later

      String mgf_file = temp_data_directory + "/" + File::getUniqueName() + ".mgf"; // the mzXML parser of PepNovo is somewhat broken.. don't use mzXML
      MascotGenericFile().store(mgf_file, exp);

      bool error(false);

      try
      {
        //temporary File to store PepNovo output
        String temp_pepnovo_outfile = qdir_temp.absoluteFilePath("tmp_pepnovo_out.txt");
        String tmp_models_dir = qdir_temp.absoluteFilePath("Models");

        std::map<String, String>mods_and_keys; //, key_to_id;

        if (qdir_temp.cd("Models"))
        {
          writeLog_("The temporary directory already contains \"Model\" Folder. Please delete it and re-run. Aborting!");
          return CANNOT_WRITE_OUTPUT_FILE;
        }
        else
        {
          qdir_temp.mkdir("Models");
          qdir_temp.cd("Models");
        }

        //copy the Models folder of OpenMS into the temp_data_directory
        QStringList pepnovo_files = qdir_models_source.entryList(QDir::Dirs | QDir::Files|QDir::NoDotAndDotDot);
        if (pepnovo_files.empty())
        {
          writeLog_("The \"Model\" directory does not contain model files. Aborting!");
          return INPUT_FILE_NOT_FOUND;
        }

        for (QStringList::ConstIterator file_it=pepnovo_files.begin(); file_it!=pepnovo_files.end(); ++file_it)
        {
          if (qdir_models_source.cd(*file_it))
          {
            qdir_temp.mkdir(*file_it);
            qdir_temp.cd(*file_it);
            QStringList subdir_files = qdir_models_source.entryList(QDir::Dirs | QDir::Files|QDir::NoDotAndDotDot);
            for (QStringList::ConstIterator subdir_file_it=subdir_files.begin(); subdir_file_it!=subdir_files.end(); ++subdir_file_it)
            {
              QFile::copy(qdir_models_source.filePath(*subdir_file_it), qdir_temp.filePath(*subdir_file_it));
            }
            qdir_temp.cdUp();
            qdir_models_source.cdUp();
          }
          else
          {
            QFile::copy(qdir_models_source.filePath(*file_it), qdir_temp.filePath(*file_it));
          }
        }

        //generate PTM File and store in temp directory
        PepNovoInfile p_novo_infile;
        String ptm_command;
        if (!getStringList_("fixed_modifications").empty() || !getStringList_("variable_modifications").empty())
        {
          p_novo_infile.setModifications(getStringList_("fixed_modifications"), getStringList_("variable_modifications"));
          p_novo_infile.store(qdir_temp.filePath("PepNovo_PTMs.txt"));
          pepnovo_files.append("PepNovo_PTMs.txt");
          p_novo_infile.getModifications(mods_and_keys);

          for (std::map<String, String>::const_iterator key_it=mods_and_keys.begin(); key_it!=mods_and_keys.end();++key_it)
          {
            if (ptm_command!="")
            {
              ptm_command+=":";
            }
            ptm_command+= key_it->first;
            //key_to_id[key_it->second]=key_it->first;
          }
        }

        //-------------------------------------------------------------
        // (3) running program according to parameters
        //-------------------------------------------------------------
        QStringList arguments;

        arguments << "-file" << mgf_file.toQString();
        arguments << "-model" << model_name.toQString();
        if (pm_tolerance != -1 ) arguments << "-pm_tolerance"<<String(pm_tolerance).toQString();
        if (fragment_tolerance != -1 ) arguments << "-fragment_tolerance" <<String(fragment_tolerance).toQString();
        if (!ptm_command.empty()) arguments <<"-PTMs" <<ptm_command.toQString();
        if (getFlag_("correct_pm")) arguments << "-correct_pm";
        if (getFlag_("use_spectrum_charge")) arguments << "-use_spectrum_charge";
        if (getFlag_("use_spectrum_mz")) arguments << "-use_spectrum_mz";
        if (getFlag_("no_quality_filter")) arguments << "-no_quality_filter";
        arguments << "-digest" << digest.toQString();
        arguments << "-num_solutions" << String(num_solutions).toQString();
        if (tag_length!=-1) arguments<<"-tag_length" << String(tag_length).toQString();
        arguments<<"-model_dir" << tmp_models_dir.toQString();
        //arguments<<">" << temp_pepnovo_outfile.toQString();

        writeDebug_("Use this line to call PepNovo: ", 1);
        writeDebug_(pepnovo_executable + " " + String(arguments.join(" ")), 1);
        QProcess process;
        process.setStandardOutputFile(temp_pepnovo_outfile.toQString());
        process.setStandardErrorFile(temp_pepnovo_outfile.toQString());
        process.start(pepnovo_executable.toQString(), arguments); // does automatic escaping etc...
        if (process.waitForFinished(-1))
        {
          //if PepNovo finished successfully use PepNovoOutfile to parse the results and generate idXML
          std::vector< PeptideIdentification > peptide_identifications;
          ProteinIdentification protein_identification;
          StringList ms_runs;
          exp.getPrimaryMSRunPath(ms_runs);
          protein_identification.setPrimaryMSRunPath(ms_runs);

          PepNovoOutfile p_novo_outfile;

          //resolve PTMs (match them back to the OpenMs Identifier String)
          std::vector<ProteinIdentification>prot_ids;
          p_novo_outfile.load(temp_pepnovo_outfile, peptide_identifications, protein_identification, -1e5, index_to_precursor, mods_and_keys);
          prot_ids.push_back(protein_identification);
          IdXMLFile().store(outputfile_name, prot_ids, peptide_identifications);
        }

        if (process.exitStatus() != 0)  error = true;
       
      }
      catch(Exception::BaseException &exc)
      {
        writeLog_(exc.what());
        LOG_ERROR << "Error occurred: " << exc.what() << std::endl;
        error = true;
      }
      
      if (!error)
      {
        File::removeDirRecursively(temp_data_directory);
        return EXECUTION_OK;
      }
      else
      {
        writeLog_("PepNovo problem. Aborting! (Details can be seen in outfiles: '" + temp_data_directory + "')");
        return EXTERNAL_PROGRAM_ERROR;
      }

    }
  ExitCodes main_(int, const char**)
  {
    // parsing parameters
    String in(getStringOption_("in"));
    String feature_in(getStringOption_("feature_in"));
    String out(getStringOption_("out"));
    double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance"));

    // reading input
    FileHandler fh;
    FileTypes::Type in_type = fh.getType(in);

    PeakMap exp;
    fh.loadExperiment(in, exp, in_type, log_type_, false, false);
    exp.sortSpectra();

    FeatureMap feature_map;
    if (feature_in != "")
    {
      FeatureXMLFile().load(feature_in, feature_map);
    }

    // calculations
    FeatureFinderAlgorithmIsotopeWavelet iso_ff;
    Param ff_param(iso_ff.getParameters());
    ff_param.setValue("max_charge", getIntOption_("max_charge"));
    ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold"));
    iso_ff.setParameters(ff_param);

    FeatureFinder ff;
    ff.setLogType(ProgressLogger::NONE);

    PeakMap exp2 = exp;
    exp2.clear(false);
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->size() != 0)
      {
        exp2.addSpectrum(*it);
      }
    }

    exp = exp2;
    exp.updateRanges();

    // TODO check MS2 and MS1 counts
    ProgressLogger progresslogger;
    progresslogger.setLogType(log_type_);
    progresslogger.startProgress(0, exp.size(), "Correcting precursor masses");
    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      progresslogger.setProgress(exp.end() - it);
      if (it->getMSLevel() != 2)
      {
        continue;
      }
      // find first MS1 scan of the MS/MS scan
      PeakMap::Iterator ms1_it = it;
      while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1)
      {
        --ms1_it;
      }
      if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1)
      {
        writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT()));
        continue;
      }
      if (ms1_it->size() == 0)
      {
        writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1);
        continue;
      }

      PeakMap::Iterator ms2_it = ms1_it;
      ++ms2_it;

      while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2)
      {
        // first: error checks
        if (ms2_it->getPrecursors().empty())
        {
          writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1);
          ++ms2_it;
          continue;
        }
        else if (ms2_it->getPrecursors().size() > 1)
        {
          writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one.");
        }

        Precursor prec = *ms2_it->getPrecursors().begin();
        double prec_pos = prec.getMZ();

        PeakMap new_exp;
        // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...)
        PeakSpectrum zoom_spec;
        for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit)
        {
          if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3)
          {
            zoom_spec.push_back(*pit);
          }
        }
        new_exp.addSpectrum(zoom_spec);
        new_exp.updateRanges();
        FeatureMap features, seeds;
        ff.run("isotope_wavelet", new_exp, features, ff_param, seeds);
        if (features.empty())
        {
          writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1);
          ++ms2_it;
          continue;
        }

        double max_int(numeric_limits<double>::min());
        double min_dist(numeric_limits<double>::max());
        Size max_int_feat_idx(0);

        for (Size i = 0; i != features.size(); ++i)
        {
          if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance &&
              features[i].getIntensity() > max_int)
          {
            max_int_feat_idx = i;
            max_int = features[i].getIntensity();
            min_dist = fabs(features[i].getMZ() - prec_pos);
          }
        }


        writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5);
        if (min_dist < precursor_mass_tolerance)
        {
          prec.setMZ(features[max_int_feat_idx].getMZ());
          prec.setCharge(features[max_int_feat_idx].getCharge());
          vector<Precursor> precs;
          precs.push_back(prec);
          ms2_it->setPrecursors(precs);
          writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1);
        }

        ++ms2_it;
      }
      it = --ms2_it;
    }
    progresslogger.endProgress();

    // writing output
    fh.storeExperiment(out, exp, log_type_);

    return EXECUTION_OK;
  }
Example #20
0
  void MassTraceDetection::run_(const MapIdxSortedByInt& chrom_apices,
                                const Size total_peak_count, 
                                const PeakMap& work_exp, 
                                const std::vector<Size>& spec_offsets,
                                std::vector<MassTrace>& found_masstraces)
  {
    boost::dynamic_bitset<> peak_visited(total_peak_count);
    Size trace_number(1);

    // check presence of FWHM meta data
    int fwhm_meta_idx(-1);
    Size fwhm_meta_count(0);
    for (Size i = 0; i < work_exp.size(); ++i)
    {
      if (work_exp[i].getFloatDataArrays().size() > 0 && 
          work_exp[i].getFloatDataArrays()[0].getName() == "FWHM_ppm")
      {
        if (work_exp[i].getFloatDataArrays()[0].size() != work_exp[i].size()) 
        { // float data should always have the same size as the corresponding array
          throw Exception::InvalidSize(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, work_exp[i].size());
        }
        fwhm_meta_idx = 0;
        ++fwhm_meta_count;
      }
    }    
    if (fwhm_meta_count > 0 && fwhm_meta_count != work_exp.size())
    {
      throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
                                    String("FWHM meta arrays are expected to be missing or present for all MS spectra [") + fwhm_meta_count + "/" + work_exp.size() + "].");
    }
     

    this->startProgress(0, total_peak_count, "mass trace detection");
    Size peaks_detected(0);

    for (MapIdxSortedByInt::const_reverse_iterator m_it = chrom_apices.rbegin(); m_it != chrom_apices.rend(); ++m_it)
    {
      Size apex_scan_idx(m_it->second.first);
      Size apex_peak_idx(m_it->second.second);

      if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx])
      {
        continue;
      }

      Peak2D apex_peak;
      apex_peak.setRT(work_exp[apex_scan_idx].getRT());
      apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ());
      apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity());

      Size trace_up_idx(apex_scan_idx);
      Size trace_down_idx(apex_scan_idx);

      std::list<PeakType> current_trace;
      current_trace.push_back(apex_peak);
      std::vector<double> fwhms_mz; // peak-FWHM meta values of collected peaks

      // Initialization for the iterative version of weighted m/z mean calculation
      double centroid_mz(apex_peak.getMZ());
      double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ());
      double prev_denom(apex_peak.getIntensity());

      updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom);

      std::vector<std::pair<Size, Size> > gathered_idx;
      gathered_idx.push_back(std::make_pair(apex_scan_idx, apex_peak_idx));
      if (fwhm_meta_idx != -1)
      {
        fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]);
      }

      Size up_hitting_peak(0), down_hitting_peak(0);
      Size up_scan_counter(0), down_scan_counter(0);

      bool toggle_up = true, toggle_down = true;

      Size conseq_missed_peak_up(0), conseq_missed_peak_down(0);
      Size max_consecutive_missing(trace_termination_outliers_);

      double current_sample_rate(1.0);
      // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10));
      Size min_scans_to_consider(5);

      // double outlier_ratio(0.3);

      // double ftl_mean(centroid_mz);
      double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_);
      double intensity_so_far(apex_peak.getIntensity());

      while (((trace_down_idx > 0) && toggle_down) ||
             ((trace_up_idx < work_exp.size() - 1) && toggle_up)
             )
      {
        // *********************************************************** //
        // Step 2.1 MOVE DOWN in RT dim
        // *********************************************************** //
        if ((trace_down_idx > 0) && toggle_down)
        {
          const MSSpectrum<>& spec_trace_down = work_exp[trace_down_idx - 1];
          if (!spec_trace_down.empty())
          {
            Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz);
            double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ();
            double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity();

            double right_bound = centroid_mz + 3 * ftl_sd;
            double left_bound = centroid_mz - 3 * ftl_sd;

            if ((next_down_peak_mz <= right_bound) &&
                (next_down_peak_mz >= left_bound) &&
                !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx]
                )
            {
              Peak2D next_peak;
              next_peak.setRT(spec_trace_down.getRT());
              next_peak.setMZ(next_down_peak_mz);
              next_peak.setIntensity(next_down_peak_int);

              current_trace.push_front(next_peak);
              // FWHM average
              if (fwhm_meta_idx != -1)
              {
                fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]);
              }
              // Update the m/z mean of the current trace as we added a new peak
              updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom);
              gathered_idx.push_back(std::make_pair(trace_down_idx - 1, next_down_peak_idx));

              // Update the m/z variance dynamically
              if (reestimate_mt_sd_)           //  && (down_hitting_peak+1 > min_flank_scans))
              {
                // if (ftl_t > min_fwhm_scans)
                {
                  updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far);
                }
              }

              ++down_hitting_peak;
              conseq_missed_peak_down = 0;
            }
            else
            {
              ++conseq_missed_peak_down;
            }

          }
          --trace_down_idx;
          ++down_scan_counter;

          // trace termination criterion: max allowed number of
          // consecutive outliers reached OR cancel extension if
          // sampling_rate falls below min_sample_rate_
          if (trace_termination_criterion_ == "outlier")
          {
            if (conseq_missed_peak_down > max_consecutive_missing)
            {
              toggle_down = false;
            }
          }
          else if (trace_termination_criterion_ == "sample_rate")
          {
            current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) /
                                  (double)(down_scan_counter + up_scan_counter + 1);
            if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_)
            {
              // std::cout << "stopping down..." << std::endl;
              toggle_down = false;
            }
          }
        }

        // *********************************************************** //
        // Step 2.2 MOVE UP in RT dim
        // *********************************************************** //
        if ((trace_up_idx < work_exp.size() - 1) && toggle_up)
        {
          const MSSpectrum<>& spec_trace_up = work_exp[trace_up_idx + 1];
          if (!spec_trace_up.empty())
          {
            Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz);
            double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ();
            double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity();

            double right_bound = centroid_mz + 3 * ftl_sd;
            double left_bound = centroid_mz - 3 * ftl_sd;

            if ((next_up_peak_mz <= right_bound) &&
                (next_up_peak_mz >= left_bound) &&
                !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx])
            {
              Peak2D next_peak;
              next_peak.setRT(spec_trace_up.getRT());
              next_peak.setMZ(next_up_peak_mz);
              next_peak.setIntensity(next_up_peak_int);

              current_trace.push_back(next_peak);
              if (fwhm_meta_idx != -1)
              {
                fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]);
              }
              // Update the m/z mean of the current trace as we added a new peak
              updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom);
              gathered_idx.push_back(std::make_pair(trace_up_idx + 1, next_up_peak_idx));

              // Update the m/z variance dynamically
              if (reestimate_mt_sd_)           //  && (up_hitting_peak+1 > min_flank_scans))
              {
                // if (ftl_t > min_fwhm_scans)
                {
                  updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far);
                }
              }

              ++up_hitting_peak;
              conseq_missed_peak_up = 0;

            }
            else
            {
              ++conseq_missed_peak_up;
            }

          }

          ++trace_up_idx;
          ++up_scan_counter;

          if (trace_termination_criterion_ == "outlier")
          {
            if (conseq_missed_peak_up > max_consecutive_missing)
            {
              toggle_up = false;
            }
          }
          else if (trace_termination_criterion_ == "sample_rate")
          {
            current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1);

            if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_)
            {
              // std::cout << "stopping up" << std::endl;
              toggle_up = false;
            }
          }


        }

      }

      // std::cout << "current sr: " << current_sample_rate << std::endl;
      double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up);

      double mt_quality((double)current_trace.size() / (double)num_scans);
      // std::cout << "mt quality: " << mt_quality << std::endl;
      double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT()));

      // *********************************************************** //
      // Step 2.3 check if minimum length and quality of mass trace criteria are met
      // *********************************************************** //
      bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_);
      if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_)
      {
        // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl;

        // mark all peaks as visited
        for (Size i = 0; i < gathered_idx.size(); ++i)
        {
          peak_visited[spec_offsets[gathered_idx[i].first] +  gathered_idx[i].second] = true;
        }

        // create new MassTrace object and store collected peaks from list current_trace
        MassTrace new_trace(current_trace);
        new_trace.updateWeightedMeanRT();
        new_trace.updateWeightedMeanMZ();
        if (!fwhms_mz.empty()) new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end());
        new_trace.setQuantMethod(quant_method_);
        //new_trace.setCentroidSD(ftl_sd);
        new_trace.updateWeightedMZsd();
        new_trace.setLabel("T" + String(trace_number));
        ++trace_number;

        found_masstraces.push_back(new_trace);

        peaks_detected += new_trace.getSize();
        this->setProgress(peaks_detected);
      }
    }

    this->endProgress();

  }
Example #21
0
}
END_SECTION

START_SECTION(virtual ~MascotGenericFile())
{
  delete ptr;
}
END_SECTION

ptr = new MascotGenericFile();

START_SECTION((template < typename MapType > void load(const String &filename, MapType &exp)))
{
  PeakMap exp;
  ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp);
  TEST_EQUAL(exp.size(), 1)

  TEST_EQUAL(exp.begin()->size(), 9)
}
END_SECTION

START_SECTION((void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact = false)))
{
  PeakMap exp;
  ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp);
  
  // handling of modifications:
  Param params = ptr->getParameters();
  params.setValue("fixed_modifications", ListUtils::create<String>("Carbamidomethyl (C),Phospho (S)"));
  params.setValue("variable_modifications", ListUtils::create<String>("Oxidation (M),Deamidated (N),Deamidated (Q)"));
  ptr->setParameters(params);
Example #22
0
  ExitCodes main_(int, const char**) override
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input file names
    String in = getStringOption_("in");
    String read_method = getStringOption_("read_method");
    bool load_data = getStringOption_("loadData") == "true";

    if (read_method == "streaming")
    {
      std::cout << "Read method: streaming" << std::endl;

      // Create the consumer, set output file name, transform
      TICConsumer consumer;
      MzMLFile mzml;
      mzml.setLogType(log_type_);

      PeakFileOptions opt = mzml.getOptions();
      opt.setFillData(load_data); // whether to actually load any data
      opt.setSkipXMLChecks(true); // save time by not checking base64 strings for whitespaces 
      opt.setMaxDataPoolSize(100);
      opt.setAlwaysAppendData(false);
      mzml.setOptions(opt);
      mzml.transform(in, &consumer, true, true);

      std::cout << "There are " << consumer.nr_spectra << " spectra and " << consumer.nr_peaks << " peaks in the input file." << std::endl;
      std::cout << "The total ion current is " << consumer.TIC << std::endl;
      size_t after;
      SysInfo::getProcessMemoryConsumption(after);
      std::cout << " Memory consumption after " << after << std::endl;
    }
    else if (read_method == "regular")
    {
      std::cout << "Read method: regular" << std::endl;

      MzMLFile mzml;
      mzml.setLogType(log_type_);
      PeakFileOptions opt = mzml.getOptions();
      opt.setFillData(load_data); // whether to actually load any data
      opt.setSkipXMLChecks(true); // save time by not checking base64 strings for whitespaces 
      mzml.setOptions(opt);
      PeakMap map;
      mzml.load(in, map);
      double TIC = 0.0;
      long int nr_peaks = 0;
      for (Size i =0; i < map.size(); i++)
      {
        nr_peaks += map[i].size();
        for (Size j = 0; j < map[i].size(); j++)
        {
          TIC += map[i][j].getIntensity();
        }
      }

      std::cout << "There are " << map.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl;
      std::cout << "The total ion current is " << TIC << std::endl;
      size_t after;
      SysInfo::getProcessMemoryConsumption(after);
      std::cout << " Memory consumption after " << after << std::endl;
    }
    else if (read_method == "indexed")
    {
      std::cout << "Read method: indexed" << std::endl;
      
      IndexedMzMLFileLoader imzml;
      // load data from an indexed MzML file
      OnDiscPeakMap map;
      imzml.load(in, map);
      double TIC = 0.0;
      long int nr_peaks = 0;
      if (load_data)
      {
        for (Size i =0; i < map.getNrSpectra(); i++)
        {
          OpenMS::Interfaces::SpectrumPtr sptr = map.getSpectrumById(i);

          nr_peaks += sptr->getIntensityArray()->data.size();
          TIC += std::accumulate(sptr->getIntensityArray()->data.begin(), sptr->getIntensityArray()->data.end(), 0.0);
        }
      }

      std::cout << "There are " << map.getNrSpectra() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl;
      std::cout << "The total ion current is " << TIC << std::endl;
      size_t after;
      SysInfo::getProcessMemoryConsumption(after);
      std::cout << " Memory consumption after " << after << std::endl;
    }
    else if (read_method == "indexed_parallel")
    {
      std::cout << "Read method: indexed (parallel)" << std::endl;
      
      IndexedMzMLFileLoader imzml;
      PeakFileOptions opt = imzml.getOptions();
      opt.setFillData(load_data); // whether to actually load any data
      imzml.setOptions(opt);

      // load data from an indexed MzML file
      OnDiscPeakMap map;
      map.openFile(in, true);
      map.setSkipXMLChecks(true);

      double TIC = 0.0;
      long int nr_peaks = 0;

      if (load_data)
      {

        // firstprivate means that each thread has its own instance of the
        // variable, each copy initialized with the initial value 
#ifdef _OPENMP
#pragma omp parallel for firstprivate(map) 
#endif
        for (SignedSize i =0; i < (SignedSize)map.getNrSpectra(); i++)
        {
          OpenMS::Interfaces::SpectrumPtr sptr = map.getSpectrumById(i);
          double nr_peaks_l = sptr->getIntensityArray()->data.size();
          double TIC_l = std::accumulate(sptr->getIntensityArray()->data.begin(), sptr->getIntensityArray()->data.end(), 0.0);
#ifdef _OPENMP
#pragma omp critical (indexed)
#endif
          {
            TIC += TIC_l;
            nr_peaks += nr_peaks_l;
          }
        }

      }

      std::cout << "There are " << map.getNrSpectra() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl;
      std::cout << "The total ion current is " << TIC << std::endl;
      size_t after;
      SysInfo::getProcessMemoryConsumption(after);
      std::cout << " Memory consumption after " << after << std::endl;
    }
    else if (read_method == "cached")
    {
      std::cout << "Read method: cached" << std::endl;

      // Special handling of cached mzML as input types: 
      // we expect two paired input files which we should read into exp
      std::vector<String> split_out;
      in.split(".cachedMzML", split_out);
      if (split_out.size() != 2)
      {
        LOG_ERROR << "Cannot deduce base path from input '" << in << 
          "' (note that '.cachedMzML' should only occur once as the final ending)" << std::endl;
        return ILLEGAL_PARAMETERS;
      }
      String in_meta = split_out[0] + ".mzML";

      MzMLFile f;
      f.setLogType(log_type_);
      CachedmzML cacher;
      cacher.setLogType(log_type_);

      CachedmzML cache;
      cache.createMemdumpIndex(in);
      const std::vector<std::streampos> spectra_index = cache.getSpectraIndex();

      std::ifstream ifs_;
      ifs_.open(in.c_str(), std::ios::binary);

      double TIC = 0.0;
      long int nr_peaks = 0;
      for (Size i=0; i < spectra_index.size(); ++i)
      {

        BinaryDataArrayPtr mz_array(new BinaryDataArray);
        BinaryDataArrayPtr intensity_array(new BinaryDataArray);
        int ms_level = -1;
        double rt = -1.0;
        ifs_.seekg(spectra_index[i]);
        CachedmzML::readSpectrumFast(mz_array, intensity_array, ifs_, ms_level, rt);

        nr_peaks += intensity_array->data.size();
        for (Size j = 0; j < intensity_array->data.size(); j++)
        {
          TIC += intensity_array->data[j];
        }
      }

      std::cout << "There are " << spectra_index.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl;
      std::cout << "The total ion current is " << TIC << std::endl;
      size_t after;
      SysInfo::getProcessMemoryConsumption(after);
      std::cout << " Memory consumption after " << after << std::endl;
    }
    else if (read_method == "cached_parallel")
    {
      std::cout << "Read method: cached parallel" << std::endl;

      // Special handling of cached mzML as input types: 
      // we expect two paired input files which we should read into exp
      std::vector<String> split_out;
      in.split(".cachedMzML", split_out);
      if (split_out.size() != 2)
      {
        LOG_ERROR << "Cannot deduce base path from input '" << in << 
          "' (note that '.cachedMzML' should only occur once as the final ending)" << std::endl;
        return ILLEGAL_PARAMETERS;
      }
      String in_meta = split_out[0] + ".mzML";

      MzMLFile f;
      f.setLogType(log_type_);
      CachedmzML cacher;
      cacher.setLogType(log_type_);

      CachedmzML cache;
      cache.createMemdumpIndex(in);
      const std::vector<std::streampos> spectra_index = cache.getSpectraIndex();

      FileAbstraction filestream(in);

      double TIC = 0.0;
      long int nr_peaks = 0;
#ifdef _OPENMP
#pragma omp parallel for firstprivate(filestream) 
#endif
      for (SignedSize i=0; i < (SignedSize)spectra_index.size(); ++i)
      {
        BinaryDataArrayPtr mz_array(new BinaryDataArray);
        BinaryDataArrayPtr intensity_array(new BinaryDataArray);
        int ms_level = -1;
        double rt = -1.0;
        // we only change the position of the thread-local filestream
        filestream.getStream().seekg(spectra_index[i]);
        CachedmzML::readSpectrumFast(mz_array, intensity_array, filestream.getStream(), ms_level, rt);

        double nr_peaks_l = intensity_array->data.size();
        double TIC_l = std::accumulate(intensity_array->data.begin(), intensity_array->data.end(), 0.0);
#ifdef _OPENMP
#pragma omp critical (indexed)
#endif
        {
          TIC += TIC_l;
          nr_peaks += nr_peaks_l;
        }
      }

      std::cout << "There are " << spectra_index.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl;
      std::cout << "The total ion current is " << TIC << std::endl;
      size_t after;
      SysInfo::getProcessMemoryConsumption(after);
      std::cout << " Memory consumption after " << after << std::endl;
    }

    return EXECUTION_OK;
  }