Пример #1
  ExitCodes main_(int, const char **)
    String in = getStringOption_("in");
    String out = getStringOption_("out");
    String method = getStringOption_("method");
    String decoy_tag = getStringOption_("decoy_tag");
    double mz_threshold = getDoubleOption_("mz_threshold");
    bool exclude_similar = getFlag_("exclude_similar");
    double similarity_threshold = getDoubleOption_("similarity_threshold");
    bool append = getFlag_("append");
    bool remove_CNterm_mods = getFlag_("remove_CNterm_mods");
    bool remove_unannotated = getFlag_("remove_unannotated");
    double identity_threshold = getDoubleOption_("identity_threshold");
    Int max_attempts = getIntOption_("max_attempts");
    double mz_shift = getDoubleOption_("mz_shift");
    double precursor_mass_shift = getDoubleOption_("precursor_mass_shift");
    String allowed_fragment_types_string = getStringOption_("allowed_fragment_types");
    String allowed_fragment_charges_string = getStringOption_("allowed_fragment_charges");
    bool enable_detection_specific_losses = getFlag_("enable_detection_specific_losses");
    bool enable_detection_unspecific_losses = getFlag_("enable_detection_unspecific_losses");

    std::vector<String> allowed_fragment_types;
    allowed_fragment_types_string.split(",", allowed_fragment_types);

    std::vector<String> allowed_fragment_charges_string_vector;
    std::vector<size_t> allowed_fragment_charges;
    allowed_fragment_charges_string.split(",", allowed_fragment_charges_string_vector);
    for (size_t i = 0; i < allowed_fragment_charges_string_vector.size(); i++)
      size_t charge = std::atoi(allowed_fragment_charges_string_vector.at(i).c_str());

    if (method != "shuffle" && method != "pseudo-reverse" && method != "reverse" && method != "shift")
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "No valid decoy generation method selected!");

    TraMLFile traml;
    TargetedExperiment targeted_exp;
    TargetedExperiment targeted_decoy;

    std::cout << "Loading " << in << std::endl;
    traml.load(in, targeted_exp);

    MRMDecoy decoys = MRMDecoy();

    std::cout << "Generate decoys" << std::endl;
    decoys.generateDecoys(targeted_exp, targeted_decoy, method, decoy_tag, identity_threshold, max_attempts, mz_threshold, mz_shift, exclude_similar, similarity_threshold, remove_CNterm_mods, precursor_mass_shift, allowed_fragment_types, allowed_fragment_charges, enable_detection_specific_losses, enable_detection_unspecific_losses, remove_unannotated);

    if (append)
      TargetedExperiment targeted_merged;
      targeted_merged += targeted_exp + targeted_decoy;
      traml.store(out, targeted_merged);
      traml.store(out, targeted_decoy);
    return EXECUTION_OK;
Пример #2
  ExitCodes main_(int, const char**)

    // parameter handling
    //file list
    StringList file_list = getStringList_("in");

    //file type
    FileHandler fh;
    FileTypes::Type force_type;
    if (getStringOption_("in_type").size() > 0)
      force_type = FileTypes::nameToType(getStringOption_("in_type"));
      force_type = fh.getType(file_list[0]);

    //output file names and types
    String out_file = getStringOption_("out");

    // calculations

    bool annotate_file_origin =  getFlag_("annotate_file_origin");

    if (force_type == FileTypes::FEATUREXML)
      FeatureMap<> out;
      for (Size i = 0; i < file_list.size(); ++i)
        FeatureMap<> map;
        FeatureXMLFile fh;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
          for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it)
            it->setMetaValue("file_origin", DataValue(file_list[i]));
        out += map;

      // writing output

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      FeatureXMLFile f;
      f.store(out_file, out);

    else if (force_type == FileTypes::CONSENSUSXML)
      ConsensusMap out;
      ConsensusXMLFile fh;
      fh.load(file_list[0], out);
      //skip first file
      for (Size i = 1; i < file_list.size(); ++i)
        ConsensusMap map;
        ConsensusXMLFile fh;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
          for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it)
            it->setMetaValue("file_origin", DataValue(file_list[i]));
        out += map;

      // writing output

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      ConsensusXMLFile f;
      f.store(out_file, out);
    else if (force_type == FileTypes::TRAML)
      TargetedExperiment out;
      for (Size i = 0; i < file_list.size(); ++i)
        TargetedExperiment map;
        TraMLFile fh;
        fh.load(file_list[i], map);
        out += map;

      // writing output

      //annotate output with data processing info
      Software software;

      TraMLFile f;
      f.store(out_file, out);
      // we might want to combine different types, thus we only
      // query in_type (which applies to all files)
      // and not the suffix or content of a single file
      force_type = FileTypes::nameToType(getStringOption_("in_type"));

      bool rt_auto_number = getFlag_("raw:rt_auto");
      bool rt_filename = getFlag_("raw:rt_filename");
      bool rt_custom = false;
      DoubleList custom_rts = getDoubleList_("raw:rt_custom");
      if (custom_rts.size() != 0)
        rt_custom = true;
        if (custom_rts.size() != file_list.size())
          writeLog_("Custom retention time list must have as many elements as there are input files!");
          return ILLEGAL_PARAMETERS;

      //ms level
      bool user_ms_level = getFlag_("raw:user_ms_level");

      MSExperiment<> out;
      UInt rt_auto = 0;
      UInt native_id = 0;
      std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms;
      for (Size i = 0; i < file_list.size(); ++i)
        String filename = file_list[i];

        //load file
        MSExperiment<> in;
        fh.loadExperiment(filename, in, force_type, log_type_);
        if (in.empty() && in.getChromatograms().empty())
          writeLog_(String("Warning: Empty file '") + filename + "'!");
        out.reserve(out.size() + in.size());

        //warn if custom RT and more than one scan in input file
        if (rt_custom && in.size() > 1)
          writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!");

        for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2)
          //handle rt
          Real rt_final = it2->getRT();
          if (rt_auto_number)
            rt_final = ++rt_auto;
          else if (rt_custom)
            rt_final = custom_rts[i];
          else if (rt_filename)
            if (!filename.hasSubstring("rt"))
              writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'"));
            for (Size i = 0; i < filename.size(); ++i)
              if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i]))
                String rt;
                while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i])))
                  rt += filename[i++];
                if (rt.size() > 0)
                  // remove dot from rt3892.98.dta
                  //                          ^
                  if (rt[rt.size() - 1] == '.')
                    // remove last character
                    rt.erase(rt.end() - 1);
                  float tmp = rt.toFloat();
                  rt_final = tmp;
                catch (Exception::ConversionError)
                  writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'."));

          // none of the rt methods were successful
          if (rt_final == -1)
            writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'");


          if (user_ms_level)

        // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles)
        if (in.size() == 1)
          in.getSourceFiles().clear();   // delete source file annotated from source file (its in the spectrum anyways)
        // copy experimental settings from first file
        if (i == 0)
        else // otherwise append
          out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then

        // also add the chromatograms
        for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2)

      // set the chromatograms

      // writing output

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      MzMLFile f;
      f.store(out_file, out);


    return EXECUTION_OK;
  ExitCodes main_(int, const char**)
    OpenMS::StringList file_list = getStringList_("swath_files");
    std::string tr_file = getStringOption_("tr");
    std::cout << tr_file << std::endl;
    //std::string out = getStringOption_("out");
    //std::cout << out << std::endl;
    double min_upper_edge_dist = getDoubleOption_("min_upper_edge_dist");

    // If we have a transformation file, trafo will transform the RT in the
    // scoring according to the model. If we dont have one, it will apply the
    // null transformation.
    Param feature_finder_param = getParam_().copy("algorithm:", true);

    // Create the output map, load the input TraML file and the chromatograms
    MapType exp;
    OpenSwath::LightTargetedExperiment transition_exp;

    std::cout << "Loading TraML file" << std::endl;
      OpenMS::TargetedExperiment transition_exp_;
      TraMLFile t;
      t.load(tr_file, transition_exp_);
      //int pept =  transition_exp_.getPeptides().size();
      //int prot = transition_exp_.getProteins().size();
      //int trans = transition_exp_.getTransitions().size();
      OpenSwathDataAccessHelper::convertTargetedExp(transition_exp_, transition_exp);
      int ltrans = transition_exp.transitions.size();

      std::cout << ltrans << std::endl;
    // Here we deal with SWATH files (can be multiple files)

    for (Size i = 0; i < file_list.size(); ++i)
      MzMLFile swath_file;
      MapTypePtr swath_map (new MapType);
      FeatureMap featureFile;
      std::cout << "Loading file " << file_list[i] << std::endl;

      // no progress log on the console in parallel

      std::string fileout = file_list[i];

      /// Returns the basename of the file (without the path).
      /// Returns the path of the file (without the file name).

      //boost::filesystem::path x(fileout);
      //boost::filesystem::path y = x.parent_path()  ;
      //std::string fname = x.stem().string();

      //std::string tmp = File.basename(fileout);
      std::string fname = File::removeExtension(fileout);
      fname += ".tsv";

      swath_file.load(file_list[i], *swath_map);
      if (swath_map->size() == 0 || (*swath_map)[0].getPrecursors().size() == 0)
        std::cerr << "WARNING: File " << swath_map->getLoadedFilePath()
                  << " does not have any experiments or any precursors. Is it a SWATH map?"
                  << std::endl;
      // Find the transitions to extract and extract them
      OpenSwath::LightTargetedExperiment transition_exp_used;
      double upper, lower;
      const std::vector<Precursor> prec = (*swath_map)[0].getPrecursors();
      lower = prec[0].getMZ() - prec[0].getIsolationWindowLowerOffset();
      upper = prec[0].getMZ() + prec[0].getIsolationWindowUpperOffset();
      OpenSwathHelper::selectSwathTransitions(transition_exp, transition_exp_used,
                                              min_upper_edge_dist, lower, upper);
      if (transition_exp_used.getTransitions().size() == 0)
        std::cerr << "WARNING: For file " << swath_map->getLoadedFilePath()
                  << " there are no transitions to extract." << std::endl;
      //OpenMS::MRMFeatureFinderScoring::TransitionGroupMapType transition_group_map;
      std::cout << "Using Spectrum Interface!" << std::endl;
      OpenSwath::SpectrumAccessPtr  spectrumAccess = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(

      //std::cout << "using data frame writer for storing data. Outfile :" << out << std::endl;
      OpenSwath::IDataFrameWriter* dfw = new OpenSwath::CSVWriter(fname);
      OpenMS::DiaPrescore dp;
      dp.operator()(spectrumAccess, transition_exp_used, dfw);
      delete dfw;
      //featureFinder.pickExperiment(chromatogram_ptr, out_featureFile,
      //transition_exp_used, trafo, swath_ptr, transition_group_map);
      //FeatureXMLFile().store(out, out_featureFile);
    }         //end of for loop
    return EXECUTION_OK;
  }       //end of _main
  ExitCodes main_(int, const char **)
    StringList file_list = getStringList_("in");
    String tr_file_str = getStringOption_("tr");
    String out = getStringOption_("out");
    bool is_swath = getFlag_("is_swath");
    bool ppm = getFlag_("ppm");
    bool extract_MS1 = getFlag_("extract_MS1");
    double min_upper_edge_dist = getDoubleOption_("min_upper_edge_dist");
    double mz_extraction_window = getDoubleOption_("mz_window");
    double rt_extraction_window = getDoubleOption_("rt_window");

    String extraction_function = getStringOption_("extraction_function");

    // If we have a transformation file, trafo will transform the RT in the
    // scoring according to the model. If we dont have one, it will apply the
    // null transformation.
    String trafo_in = getStringOption_("rt_norm");
    TransformationDescription trafo;
    if (trafo_in.size() > 0) 
      TransformationXMLFile trafoxml;

      String model_type = getStringOption_("model:type");
      Param model_params = getParam_().copy("model:", true);
      trafoxml.load(trafo_in, trafo);
      trafo.fitModel(model_type, model_params);
    TransformationDescription trafo_inverse = trafo;

    const char * tr_file = tr_file_str.c_str();

    MapType out_exp;
    std::vector< OpenMS::MSChromatogram > chromatograms;
    TraMLFile traml;
    OpenMS::TargetedExperiment targeted_exp;

    std::cout << "Loading TraML file" << std::endl;
    traml.load(tr_file, targeted_exp);
    std::cout << "Loaded TraML file" << std::endl;

    // Do parallelization over the different input files
    // Only in OpenMP 3.0 are unsigned loop variables allowed
#ifdef _OPENMP
#pragma omp parallel for
    for (SignedSize i = 0; i < boost::numeric_cast<SignedSize>(file_list.size()); ++i)
      boost::shared_ptr<PeakMap > exp(new PeakMap);
      MzMLFile f;
      // Logging and output to the console
      // IF_MASTERTHREAD f.setLogType(log_type_); 

      // Find the transitions to extract and extract them
      MapType tmp_out;
      OpenMS::TargetedExperiment transition_exp_used;
      f.load(file_list[i], *exp);
      if (exp->empty() ) { continue; } // if empty, go on
      OpenSwath::SpectrumAccessPtr expptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp);
      bool do_continue = true;
      if (is_swath)
        do_continue = OpenSwathHelper::checkSwathMapAndSelectTransitions(*exp, targeted_exp, transition_exp_used, min_upper_edge_dist);  
        transition_exp_used = targeted_exp;

#ifdef _OPENMP
#pragma omp critical (OpenSwathChromatogramExtractor_metadata)
      // after loading the first file, copy the meta data from that experiment
      // this may happen *after* chromatograms were already added to the
      // output, thus we do NOT fill the experiment here but rather store all
      // the chromatograms in the "chromatograms" array and store them in
      // out_exp afterwards.
      if (i == 0) 
        out_exp = *exp;

      std::cout << "Extracting " << transition_exp_used.getTransitions().size() << " transitions" << std::endl;
      std::vector< OpenSwath::ChromatogramPtr > chromatogram_ptrs;
      std::vector< ChromatogramExtractor::ExtractionCoordinates > coordinates;

      // continue if the map is not empty
      if (do_continue)

        // Prepare the coordinates (with or without rt extraction) and then extract the chromatograms
        ChromatogramExtractor extractor;
        if (rt_extraction_window < 0)
          extractor.prepare_coordinates(chromatogram_ptrs, coordinates, transition_exp_used, rt_extraction_window, extract_MS1);
          // Use an rt extraction window of 0.0 which will just write the retention time in start / end positions
          extractor.prepare_coordinates(chromatogram_ptrs, coordinates, transition_exp_used, 0.0, extract_MS1);
          for (std::vector< ChromatogramExtractor::ExtractionCoordinates >::iterator it = coordinates.begin(); it != coordinates.end(); ++it)
            it->rt_start = trafo_inverse.apply(it->rt_start) - rt_extraction_window / 2.0;
            it->rt_end = trafo_inverse.apply(it->rt_end) + rt_extraction_window / 2.0;
        extractor.extractChromatograms(expptr, chromatogram_ptrs, coordinates, 
            mz_extraction_window, ppm, extraction_function);

#ifdef _OPENMP
#pragma omp critical (OpenSwathChromatogramExtractor_insertMS1)
          // Remove potential meta value indicating cached data
          SpectrumSettings exp_settings = (*exp)[0];
          for (Size j = 0; j < exp_settings.getDataProcessing().size(); j++)
            if (exp_settings.getDataProcessing()[j]->metaValueExists("cached_data"))
            { exp_settings.getDataProcessing()[j]->removeMetaValue("cached_data"); }
          extractor.return_chromatogram(chromatogram_ptrs, coordinates, transition_exp_used, exp_settings, chromatograms, extract_MS1);

      } // end of do_continue
    } // end of loop over all files / end of OpenMP

    // TODO check that no chromatogram IDs occur multiple times !
    // store the output
    MzMLFile mzf;
    addDataProcessing_(out_exp, getProcessingInfo_(DataProcessing::SMOOTHING));
    mzf.store(out, out_exp);

    return EXECUTION_OK;
Пример #5
  ExitCodes main_(int, const char**)

    // parameter handling
    // file list
    StringList file_list = getStringList_("in");

    // file type
    FileHandler file_handler;
    FileTypes::Type force_type;
    if (getStringOption_("in_type").size() > 0)
      force_type = FileTypes::nameToType(getStringOption_("in_type"));
      force_type = file_handler.getType(file_list[0]);

    // output file names and types
    String out_file = getStringOption_("out");

    bool annotate_file_origin =  getFlag_("annotate_file_origin");
    rt_gap_ = getDoubleOption_("rt_concat:gap");
    vector<String> trafo_out = getStringList_("rt_concat:trafo_out");
    if (trafo_out.empty())
      // resize now so we don't have to worry about indexing out of bounds:
    else if (trafo_out.size() != file_list.size())
      writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!");

    // calculations

    if (force_type == FileTypes::FEATUREXML)
      FeatureMap out;
      FeatureXMLFile fh;
      for (Size i = 0; i < file_list.size(); ++i)
        FeatureMap map;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
          for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it)
            it->setMetaValue("file_origin", DataValue(file_list[i]));

        if (rt_gap_ > 0.0) // concatenate in RT
          adjustRetentionTimes_(map, trafo_out[i], i == 0);

        out += map;

      // writing output

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      fh.store(out_file, out);

    else if (force_type == FileTypes::CONSENSUSXML)
      ConsensusMap out;
      ConsensusXMLFile fh;
      fh.load(file_list[0], out);
      // skip first file
      for (Size i = 1; i < file_list.size(); ++i)
        ConsensusMap map;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
          for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it)
            it->setMetaValue("file_origin", DataValue(file_list[i]));

        if (rt_gap_ > 0.0) // concatenate in RT
          adjustRetentionTimes_(map, trafo_out[i], i == 0);

        out += map;

      // writing output

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      fh.store(out_file, out);

    else if (force_type == FileTypes::TRAML)
      TargetedExperiment out;
      TraMLFile fh;
      for (Size i = 0; i < file_list.size(); ++i)
        TargetedExperiment map;
        fh.load(file_list[i], map);
        out += map;

      // writing output

      // annotate output with data processing info
      Software software;

      fh.store(out_file, out);
    else // raw data input (e.g. mzML)
      // RT
      bool rt_auto_number = getFlag_("raw:rt_auto");
      bool rt_filename = getFlag_("raw:rt_filename");
      bool rt_custom = false;
      DoubleList custom_rts = getDoubleList_("raw:rt_custom");
      if (!custom_rts.empty())
        rt_custom = true;
        if (custom_rts.size() != file_list.size())
          writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!");
          return ILLEGAL_PARAMETERS;

      // MS level
      Int ms_level = getIntOption_("raw:ms_level");

      MSExperiment<> out;
      UInt rt_auto = 0;
      UInt native_id = 0;
      for (Size i = 0; i < file_list.size(); ++i)
        String filename = file_list[i];

        // load file
        force_type = file_handler.getType(file_list[i]);
        MSExperiment<> in;
        file_handler.loadExperiment(filename, in, force_type, log_type_);

        if (in.empty() && in.getChromatograms().empty())
          writeLog_(String("Warning: Empty file '") + filename + "'!");
        out.reserve(out.size() + in.size());

        // warn if custom RT and more than one scan in input file
        if (rt_custom && in.size() > 1)
          writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!");

        // handle special raw data options:
        for (MSExperiment<>::iterator spec_it = in.begin();
             spec_it != in.end(); ++spec_it)
          float rt_final = spec_it->getRT();
          if (rt_auto_number)
            rt_final = ++rt_auto;
          else if (rt_custom)
            rt_final = custom_rts[i];
          else if (rt_filename)
            static const boost::regex re("rt(\\d+(\\.\\d+)?)");
            boost::smatch match;
            bool found = boost::regex_search(filename, match, re);
            if (found)
              rt_final = String(match[1]).toFloat();
              writeLog_("Warning: could not extract retention time from filename '" + filename + "'");

          // none of the rt methods were successful
          if (rt_final < 0)
            writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'");

          spec_it->setNativeID("spectrum=" + String(native_id));
          if (ms_level > 0)

        // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles)
        if (in.size() == 1)
          in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways)

        if (rt_gap_ > 0.0) // concatenate in RT
          adjustRetentionTimes_(in, trafo_out[i], i == 0);

        // add spectra to output
        for (MSExperiment<>::const_iterator spec_it = in.begin();
             spec_it != in.end(); ++spec_it)
        // also add the chromatograms
        for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator
               chrom_it = in.getChromatograms().begin(); chrom_it != 
               in.getChromatograms().end(); ++chrom_it)

        // copy experimental settings from first file
        if (i == 0)
        else // otherwise append
          out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then

      // writing output

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      MzMLFile f;
      f.store(out_file, out);

    return EXECUTION_OK;