void QuantitativeExperimentalDesign::mergeConsensusMaps_(ConsensusMap & out, const String & experiment, StringList & file_paths)
{
    ConsensusMap map;

    LOG_INFO << "Merge consensus maps: " << endl;
    UInt counter = 1;
    for (StringList::Iterator file_it = file_paths.begin(); file_it != file_paths.end(); ++file_it, ++counter)
    {
        //load should clear the map
        ConsensusXMLFile().load(*file_it, map);
        for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it)
        {
            it->setMetaValue("experiment", DataValue(experiment));
        }
        out += map;
    }
    LOG_INFO << endl;
}
Пример #2
0
  ExitCodes main_(int, const char**)
  {

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    //file list
    StringList file_list = getStringList_("in");

    //file type
    FileHandler fh;
    FileTypes::Type force_type;
    if (getStringOption_("in_type").size() > 0)
    {
      force_type = FileTypes::nameToType(getStringOption_("in_type"));
    }
    else
    {
      force_type = fh.getType(file_list[0]);
    }

    //output file names and types
    String out_file = getStringOption_("out");

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    bool annotate_file_origin =  getFlag_("annotate_file_origin");

    if (force_type == FileTypes::FEATUREXML)
    {
      FeatureMap<> out;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        FeatureMap<> map;
        FeatureXMLFile fh;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      FeatureXMLFile f;
      f.store(out_file, out);

    }
    else if (force_type == FileTypes::CONSENSUSXML)
    {
      ConsensusMap out;
      ConsensusXMLFile fh;
      fh.load(file_list[0], out);
      //skip first file
      for (Size i = 1; i < file_list.size(); ++i)
      {
        ConsensusMap map;
        ConsensusXMLFile fh;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      ConsensusXMLFile f;
      f.store(out_file, out);
    }
    else if (force_type == FileTypes::TRAML)
    {
      TargetedExperiment out;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        TargetedExperiment map;
        TraMLFile fh;
        fh.load(file_list[i], map);
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      Software software;
      software.setName("FileMerger");
      software.setVersion(VersionInfo::getVersion());
      out.addSoftware(software);

      TraMLFile f;
      f.store(out_file, out);
    }
    else
    {
      // we might want to combine different types, thus we only
      // query in_type (which applies to all files)
      // and not the suffix or content of a single file
      force_type = FileTypes::nameToType(getStringOption_("in_type"));

      //rt
      bool rt_auto_number = getFlag_("raw:rt_auto");
      bool rt_filename = getFlag_("raw:rt_filename");
      bool rt_custom = false;
      DoubleList custom_rts = getDoubleList_("raw:rt_custom");
      if (custom_rts.size() != 0)
      {
        rt_custom = true;
        if (custom_rts.size() != file_list.size())
        {
          writeLog_("Custom retention time list must have as many elements as there are input files!");
          printUsage_();
          return ILLEGAL_PARAMETERS;
        }
      }

      //ms level
      bool user_ms_level = getFlag_("raw:user_ms_level");

      MSExperiment<> out;
      out.reserve(file_list.size());
      UInt rt_auto = 0;
      UInt native_id = 0;
      std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        String filename = file_list[i];

        //load file
        MSExperiment<> in;
        fh.loadExperiment(filename, in, force_type, log_type_);
        if (in.empty() && in.getChromatograms().empty())
        {
          writeLog_(String("Warning: Empty file '") + filename + "'!");
          continue;
        }
        out.reserve(out.size() + in.size());

        //warn if custom RT and more than one scan in input file
        if (rt_custom && in.size() > 1)
        {
          writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!");
        }

        for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2)
        {
          //handle rt
          Real rt_final = it2->getRT();
          if (rt_auto_number)
          {
            rt_final = ++rt_auto;
          }
          else if (rt_custom)
          {
            rt_final = custom_rts[i];
          }
          else if (rt_filename)
          {
            if (!filename.hasSubstring("rt"))
            {
              writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'"));
            }
            for (Size i = 0; i < filename.size(); ++i)
            {
              if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i]))
              {
                String rt;
                while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i])))
                {
                  rt += filename[i++];
                }
                if (rt.size() > 0)
                {
                  // remove dot from rt3892.98.dta
                  //                          ^
                  if (rt[rt.size() - 1] == '.')
                  {
                    // remove last character
                    rt.erase(rt.end() - 1);
                  }
                }
                try
                {
                  float tmp = rt.toFloat();
                  rt_final = tmp;
                }
                catch (Exception::ConversionError)
                {
                  writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'."));
                }
              }
            }
          }

          // none of the rt methods were successful
          if (rt_final == -1)
          {
            writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'");
          }

          out.addSpectrum(*it2);
          out.getSpectra().back().setRT(rt_final);
          out.getSpectra().back().setNativeID(native_id);

          if (user_ms_level)
          {
            out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level"));
          }
          ++native_id;
        }

        // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles)
        if (in.size() == 1)
        {
          out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]);
          in.getSourceFiles().clear();   // delete source file annotated from source file (its in the spectrum anyways)
        }
        // copy experimental settings from first file
        if (i == 0)
        {
          out.ExperimentalSettings::operator=(in);
        }
        else // otherwise append
        {
          out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then
        }

        // also add the chromatograms
        for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2)
        {
          all_chromatograms.push_back(*it2);
        }

      }
      // set the chromatograms
      out.setChromatograms(all_chromatograms);

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      MzMLFile f;
      f.setLogType(log_type_);
      f.store(out_file, out);

    }

    return EXECUTION_OK;
  }
Пример #3
0
  ExitCodes main_(int, const char**)
  {

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    // file list
    StringList file_list = getStringList_("in");

    // file type
    FileHandler file_handler;
    FileTypes::Type force_type;
    if (getStringOption_("in_type").size() > 0)
    {
      force_type = FileTypes::nameToType(getStringOption_("in_type"));
    }
    else
    {
      force_type = file_handler.getType(file_list[0]);
    }

    // output file names and types
    String out_file = getStringOption_("out");

    bool annotate_file_origin =  getFlag_("annotate_file_origin");
    rt_gap_ = getDoubleOption_("rt_concat:gap");
    vector<String> trafo_out = getStringList_("rt_concat:trafo_out");
    if (trafo_out.empty())
    {
      // resize now so we don't have to worry about indexing out of bounds:
      trafo_out.resize(file_list.size());
    }
    else if (trafo_out.size() != file_list.size())
    {
      writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!");
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    if (force_type == FileTypes::FEATUREXML)
    {
      FeatureMap out;
      FeatureXMLFile fh;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        FeatureMap map;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }

        if (rt_gap_ > 0.0) // concatenate in RT
        {
          adjustRetentionTimes_(map, trafo_out[i], i == 0);
        }

        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      fh.store(out_file, out);
    }

    else if (force_type == FileTypes::CONSENSUSXML)
    {
      ConsensusMap out;
      ConsensusXMLFile fh;
      fh.load(file_list[0], out);
      // skip first file
      for (Size i = 1; i < file_list.size(); ++i)
      {
        ConsensusMap map;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }

        if (rt_gap_ > 0.0) // concatenate in RT
        {
          adjustRetentionTimes_(map, trafo_out[i], i == 0);
        }

        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      fh.store(out_file, out);
    }

    else if (force_type == FileTypes::TRAML)
    {
      TargetedExperiment out;
      TraMLFile fh;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        TargetedExperiment map;
        fh.load(file_list[i], map);
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info
      Software software;
      software.setName("FileMerger");
      software.setVersion(VersionInfo::getVersion());
      out.addSoftware(software);

      fh.store(out_file, out);
    }
    else // raw data input (e.g. mzML)
    {
      // RT
      bool rt_auto_number = getFlag_("raw:rt_auto");
      bool rt_filename = getFlag_("raw:rt_filename");
      bool rt_custom = false;
      DoubleList custom_rts = getDoubleList_("raw:rt_custom");
      if (!custom_rts.empty())
      {
        rt_custom = true;
        if (custom_rts.size() != file_list.size())
        {
          writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!");
          return ILLEGAL_PARAMETERS;
        }
      }

      // MS level
      Int ms_level = getIntOption_("raw:ms_level");

      MSExperiment<> out;
      UInt rt_auto = 0;
      UInt native_id = 0;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        String filename = file_list[i];

        // load file
        force_type = file_handler.getType(file_list[i]);
        MSExperiment<> in;
        file_handler.loadExperiment(filename, in, force_type, log_type_);

        if (in.empty() && in.getChromatograms().empty())
        {
          writeLog_(String("Warning: Empty file '") + filename + "'!");
          continue;
        }
        out.reserve(out.size() + in.size());

        // warn if custom RT and more than one scan in input file
        if (rt_custom && in.size() > 1)
        {
          writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!");
        }

        // handle special raw data options:
        for (MSExperiment<>::iterator spec_it = in.begin();
             spec_it != in.end(); ++spec_it)
        {
          float rt_final = spec_it->getRT();
          if (rt_auto_number)
          {
            rt_final = ++rt_auto;
          }
          else if (rt_custom)
          {
            rt_final = custom_rts[i];
          }
          else if (rt_filename)
          {
            static const boost::regex re("rt(\\d+(\\.\\d+)?)");
            boost::smatch match;
            bool found = boost::regex_search(filename, match, re);
            if (found)
            {
              rt_final = String(match[1]).toFloat();
            }
            else
            {
              writeLog_("Warning: could not extract retention time from filename '" + filename + "'");
            }
          }

          // none of the rt methods were successful
          if (rt_final < 0)
          {
            writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'");
          }

          spec_it->setRT(rt_final);
          spec_it->setNativeID("spectrum=" + String(native_id));
          if (ms_level > 0)
          {
            spec_it->setMSLevel(ms_level);
          }
          ++native_id;
        }

        // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles)
        if (in.size() == 1)
        {
          in[0].setSourceFile(in.getSourceFiles()[0]);
          in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways)
        }

        if (rt_gap_ > 0.0) // concatenate in RT
        {
          adjustRetentionTimes_(in, trafo_out[i], i == 0);
        }

        // add spectra to output
        for (MSExperiment<>::const_iterator spec_it = in.begin();
             spec_it != in.end(); ++spec_it)
        {
          out.addSpectrum(*spec_it);
        }
        // also add the chromatograms
        for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator
               chrom_it = in.getChromatograms().begin(); chrom_it != 
               in.getChromatograms().end(); ++chrom_it)
        {
          out.addChromatogram(*chrom_it);
        }

        // copy experimental settings from first file
        if (i == 0)
        {
          out.ExperimentalSettings::operator=(in);
        }
        else // otherwise append
        {
          out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then
        }
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      MzMLFile f;
      f.setLogType(log_type_);
      f.store(out_file, out);
    }

    return EXECUTION_OK;
  }