C++ (Cpp) MSExperiment::begin Examples

Programming Language: C++ (Cpp)

Class/Type: MSExperiment

Method/Function: begin

Examples at hotexamples.com: 19

C++ (Cpp) MSExperiment::begin - 19 examples found. These are the top rated real world C++ (Cpp) examples of MSExperiment::begin extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

begin(19)

end(16)

empty(6)

addSpectrum(5)

clearMetaDataArrays(2)

clearRanges(2)

findNearest(2)

getChromatogram(2)

MZBegin(1)

MZEnd(1)

addChromatogram(1)

areaBegin(1)

areaEnd(1)

clear(1)

Example #1

Show file

File: MapAlignmentTransformer.cpp Project: chahuistle/OpenMS

  void MapAlignmentTransformer::transformRetentionTimes(
    MSExperiment<>& msexp, const TransformationDescription& trafo,
    bool store_original_rt)
  {
    msexp.clearRanges();

    // Transform spectra
    for (MSExperiment<>::iterator mse_iter = msexp.begin();
         mse_iter != msexp.end(); ++mse_iter)
    {
      double rt = mse_iter->getRT();
      if (store_original_rt) storeOriginalRT_(*mse_iter, rt);
      mse_iter->setRT(trafo.apply(rt));
    }

    // Also transform chromatograms
    for (Size i = 0; i < msexp.getNrChromatograms(); ++i)
    {
      MSChromatogram<ChromatogramPeak>& chromatogram = msexp.getChromatogram(i);
      vector<double> original_rts;
      if (store_original_rt) original_rts.reserve(chromatogram.size());
      for (Size j = 0; j < chromatogram.size(); j++)
      {
        double rt = chromatogram[j].getRT();
        if (store_original_rt) original_rts.push_back(rt);
        chromatogram[j].setRT(trafo.apply(rt));
      }
      if (store_original_rt && !chromatogram.metaValueExists("original_rt"))
      {
        chromatogram.setMetaValue("original_rt", original_rts);
      }
    }

    msexp.updateRanges();
  }

Example #2

Show file

File: MapAlignmentTransformer.C Project: BioITer/OpenMS

  void MapAlignmentTransformer::transformSinglePeakMap(MSExperiment<> & msexp,
                                                       const TransformationDescription & trafo)
  {
    msexp.clearRanges();

    // Transform spectra
    for (MSExperiment<>::iterator mse_iter = msexp.begin(); mse_iter != msexp.end(); ++mse_iter)
    {
      DoubleReal rt = mse_iter->getRT();
      mse_iter->setRT(trafo.apply(rt));
    }

    // Also transform chromatograms
    DoubleReal rt;
    std::vector<MSChromatogram<ChromatogramPeak> > chromatograms;
    for (Size i = 0; i < msexp.getChromatograms().size(); i++)
    {
      MSChromatogram<ChromatogramPeak> chromatogram = msexp.getChromatograms()[i];
      for (Size j = 0; j < chromatogram.size(); j++)
      {
        rt = chromatogram[j].getRT();
        chromatogram[j].setRT(trafo.apply(rt));
      }
      chromatograms.push_back(chromatogram);
    }
    msexp.setChromatograms(chromatograms);

    msexp.updateRanges();
  }

Example #3

Show file

File: TOFCalibration.cpp Project: chahuistle/OpenMS

  void TOFCalibration::matchMasses_(MSExperiment<> & calib_peaks,
                                    std::vector<std::vector<unsigned int> > & monoiso_peaks,
                                    std::vector<unsigned int> & obs_masses,
                                    std::vector<double> & exp_masses, unsigned int idx)
  {
    for (unsigned int i = 0; i < monoiso_peaks[idx].size(); ++i)
    {
      for (unsigned int j = 0; j < exp_masses_.size(); ++j)
      {
        if (fabs(((calib_peaks.begin() + idx)->begin() + (monoiso_peaks[idx])[i])->getMZ() - exp_masses_[j]) < 1)
        {
          obs_masses.push_back((monoiso_peaks[idx])[i]);
          exp_masses.push_back(exp_masses_[j]);
          break;
        }
      }
    }
#ifdef DEBUG_CALIBRATION

    std::cout << "\n\n---------\nmatching monoisotopic peaks\n";

    for (unsigned int i = 0; i < obs_masses.size(); ++i)
    {
      std::cout << ((calib_peaks_ft_.begin() + idx)->begin() + obs_masses[i])->getMZ()
                << "\t" << exp_masses[i]
                << std::endl;

    }

#endif
  }

Example #4

Show file

File: MapAlignmentAlgorithmIdentification.C Project: aiche/open-ms-mirror

 // lists of peptide hits in "maps" will be sorted
 void MapAlignmentAlgorithmIdentification::getRetentionTimes_(
   MSExperiment<> & experiment, SeqToList & rt_data)
 {
   for (MSExperiment<>::Iterator exp_it = experiment.begin();
        exp_it != experiment.end(); ++exp_it)
   {
     getRetentionTimes_(exp_it->getPeptideIdentifications(), rt_data);
   }
   // duplicates should not be possible -> no need to remove them
 }

Example #5

Show file

File: Tutorial_MSExperiment.cpp Project: OpenMS/OpenMS

int main()
{

  // create a peak map containing 4 dummy spectra and peaks
  MSExperiment exp;

  // The following examples creates a MSExperiment containing four MSSpectrum instances.
  for (Size i = 0; i < 4; ++i)
  {
    MSSpectrum spectrum;
    spectrum.setRT(i);
    spectrum.setMSLevel(1);
    for (float mz = 500.0; mz <= 900; mz += 100.0)
    {
      Peak1D peak;
      peak.setMZ(mz + i);
      spectrum.push_back(peak);
    }
    
    exp.addSpectrum(spectrum);
  }

  // Iteration over the RT range (2,3) and the m/z range (603,802) and print the peak positions.
  for (auto it = exp.areaBegin(2.0, 3.0, 603.0, 802.0); it != exp.areaEnd(); ++it)
  {
    cout << it.getRT() << " - " << it->getMZ() << endl;
  }

  // Iteration over all peaks in the experiment. 
  // Output: RT, m/z, and intensity
  // Note that the retention time is stored in the spectrum (not in the peak object)
  for (auto s_it = exp.begin(); s_it != exp.end(); ++s_it)
  {
    for (auto p_it = s_it->begin(); p_it != s_it->end(); ++p_it)
    {
      cout << s_it->getRT() << " - " << p_it->getMZ() << " " << p_it->getIntensity() << endl;
    }
  }

  // We could store the spectra to a mzML file with:
  // MzMLFile mzml;
  // mzml.store(filename, exp);
  
  // And load it with
  // mzml.load(filename, exp);
  // If we wanted to load only the MS2 spectra we could speed up reading by setting:
  // mzml.getOptions().addMSLevel(2);
  // before executing: mzml.load(filename, exp);

  return 0;
} //end of main

Example #6

Show file

File: SeedListGenerator.cpp Project: BioinformaticsArchive/OpenMS

 void SeedListGenerator::generateSeedList(const MSExperiment<>& experiment,
                                          SeedList& seeds)
 {
   seeds.clear();
   for (MSExperiment<>::ConstIterator exp_it = experiment.begin();
        exp_it != experiment.end(); ++exp_it)
   {
     if (exp_it->getMSLevel() == 2) // MS2 spectrum -> look for precursor
     {
       MSExperiment<>::ConstIterator prec_it =
         experiment.getPrecursorSpectrum(exp_it);
       const vector<Precursor>& precursors = exp_it->getPrecursors();
       DPosition<2> point(prec_it->getRT(), precursors[0].getMZ());
       seeds.push_back(point);
     }
   }
 }

Example #7

Show file

File: Tutorial_Precursor.cpp Project: grosenberger/OpenMS

int main(int argc, const char** argv)
{
  
  if (argc < 2) return 1;
  
  // the path to the data should be given on the command line
  String tutorial_data_path(argv[1]);
  
  MSExperiment spectra;
  MzMLFile f;

  // load mzML from code examples folder
  f.load(tutorial_data_path + "/data/Tutorial_GaussFilter.mzML", spectra);

  // iterate over map and output MS2 precursor information
  for (auto s_it = spectra.begin(); s_it != spectra.end(); ++s_it)
  {
    // we are only interested in MS2 spectra so we skip all other levels
    if (s_it->getMSLevel() != 2) continue;

    // get a reference to the precursor information
    const MSSpectrum& spectrum = *s_it;
    const vector<Precursor>& precursors = spectrum.getPrecursors();

    // size check & throw exception if needed 
    if (precursors.empty()) throw Exception::InvalidSize(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, precursors.size());

    // get m/z and intensity of precursor
    double precursor_mz = precursors[0].getMZ();
    float precursor_int = precursors[0].getIntensity();
  
    // retrieve the precursor spectrum (the most recent MS1 spectrum)
    PeakMap::ConstIterator precursor_spectrum = spectra.getPrecursorSpectrum(s_it);
    double precursor_rt = precursor_spectrum->getRT();
  
    // output precursor information
    std::cout << " precusor m/z: " << precursor_mz
              << " intensity: " << precursor_int
              << " retention time (sec.): " << precursor_rt 
              << std::endl;
   }
                                                            
  return 0;
} // end of main

Example #8

Show file

File: ImageCreator.cpp Project: BioinformaticsArchive/OpenMS

 void markMS2Locations_(MSExperiment<> & exp, QImage & image, bool transpose,
                        QColor color, Size size)
 {
   double xcoef = image.width(), ycoef = image.height();
   if (transpose)
   {
     xcoef /= exp.getMaxRT() - exp.getMinRT();
     ycoef /= exp.getMaxMZ() - exp.getMinMZ();
   }
   else
   {
     xcoef /= exp.getMaxMZ() - exp.getMinMZ();
     ycoef /= exp.getMaxRT() - exp.getMinRT();
   }
   for (MSExperiment<>::Iterator spec_iter = exp.begin();
        spec_iter != exp.end(); ++spec_iter)
   {
     if (spec_iter->getMSLevel() == 2)
     {
       double mz = spec_iter->getPrecursors()[0].getMZ();
       double rt = exp.getPrecursorSpectrum(spec_iter)->getRT();
       int x, y;
       if (transpose)
       {
         x = int(xcoef * (rt - exp.getMinRT()));
         y = int(ycoef * (exp.getMaxMZ() - mz));
       }
       else
       {
         x = int(xcoef * (mz - exp.getMinMZ()));
         y = int(ycoef * (exp.getMaxRT() - rt));
       }
       addPoint_(x, y, image, color, size);  //mark MS2
     }
   }
 }

Example #9

Show file

File: TOFCalibration.cpp Project: chahuistle/OpenMS

  void TOFCalibration::applyTOFConversion_(MSExperiment<> & calib_spectra)
  {
    MSExperiment<>::iterator spec_iter = calib_spectra.begin();
    MSExperiment<>::SpectrumType::iterator peak_iter;
    unsigned int idx = 0;

    //two point conversion
    if (ml3s_.empty())
    {
      for (; spec_iter != calib_spectra.end(); ++spec_iter)
      {
        peak_iter = spec_iter->begin();
        double ml1, ml2;
        if (ml1s_.size() == 1)
        {
          ml1 = ml1s_[0];
          ml2 = ml2s_[0];
        }
        else
        {
          ml1 = ml1s_[idx];
          ml2 = ml2s_[idx];
        }

        // go through current scan
        for (; peak_iter != spec_iter->end(); ++peak_iter)
        {
          double time = peak_iter->getMZ();
          peak_iter->setPos(ml1 / 1E12 * (time * 1000 - ml2));
        }
        ++idx;
      }
    }
    else
    {
      // three point conversion
      for (; spec_iter != calib_spectra.end(); ++spec_iter)
      {
        peak_iter = spec_iter->begin();
        double ml1, ml2, ml3;
        if (ml1s_.size() == 1)
        {
          ml1 = ml1s_[0];
          ml2 = ml2s_[0];
          ml3 = ml3s_[0];
        }
        else
        {
          ml1 = ml1s_[idx];
          ml2 = ml2s_[idx];
          ml3 = ml3s_[idx];
        }

        // go through current scan
        for (; peak_iter != spec_iter->end(); ++peak_iter)
        {
          double time = peak_iter->getMZ();
          peak_iter->setPos((-ml2 - (0.1E7 * (-5E5 + sqrt(0.25E12 - ml1 * ml2 * ml3 + ml1 * ml3 * time))) / (ml1 * ml3) + time) / ml3);
        }
        ++idx;
      }
    }

  }

Example #10

Show file

File: TOFCalibration.cpp Project: chahuistle/OpenMS

  void TOFCalibration::getMonoisotopicPeaks_(MSExperiment<> & calib_peaks, std::vector<std::vector<unsigned int> > & monoiso_peaks)
  {

    MSExperiment<>::iterator spec_iter = calib_peaks.begin();
    MSExperiment<>::SpectrumType::iterator peak_iter, help_iter;
#ifdef DEBUG_CALIBRATION
    spec_iter = calib_peaks.begin();
    std::cout << "\n\nbefore---------\n\n";
    // iterate through all spectra
    for (; spec_iter != calib_peaks.end(); ++spec_iter)
    {
      peak_iter = spec_iter->begin();
      // go through current scan
      for (; peak_iter != spec_iter->end(); ++peak_iter)
      {
        std::cout << peak_iter->getMZ() << std::endl;
      }
    }

#endif
    spec_iter = calib_peaks.begin();
    // iterate through all spectra
    for (; spec_iter != calib_peaks.end(); ++spec_iter)
    {
      peak_iter = spec_iter->begin();
      help_iter = peak_iter;
      std::vector<unsigned int> vec;
      // go through current scan
      while (peak_iter < spec_iter->end())
      {
        while (peak_iter + 1 < spec_iter->end() && ((peak_iter + 1)->getMZ() - peak_iter->getMZ() < 1.2))
        {
          ++peak_iter;
        }

        vec.push_back(distance(spec_iter->begin(), help_iter));

        help_iter = peak_iter + 1;
        ++peak_iter;

      }
      monoiso_peaks.push_back(vec);

    }

#ifdef DEBUG_CALIBRATION


    std::cout << "\n\nafter---------\n\n";

    for (unsigned int i = 0; i < monoiso_peaks.size(); ++i)
    {
      for (unsigned int j = 0; j < monoiso_peaks[i].size(); ++j)
      {
        std::cout << i << "\t" << ((calib_peaks.begin() + i)->begin() + (monoiso_peaks[i])[j])->getMZ() << std::endl;
      }
      std::cout << "--------------\n";

    }
    std::cout << "--------------\n\n\n";
#endif
  }

Example #11

Show file

File: ImageCreator.cpp Project: BioinformaticsArchive/OpenMS

  ExitCodes main_(int, const char **)
  {
    //----------------------------------------------------------------
    // load data
    //----------------------------------------------------------------
    String in = getStringOption_("in");
    String in_featureXML = getStringOption_("in_featureXML");
    String out = getStringOption_("out");
    String format = getStringOption_("out_type");
    if (format.trim() == "") // get from filename
    {
      try
      {
        format = out.suffix('.');
      }
      catch (Exception::ElementNotFound & /*e*/)
      {
        format = "nosuffix";
      }
      StringListUtils::toUpper(out_formats_);
      if (!ListUtils::contains(out_formats_, format.toUpper()))
      {
        LOG_ERROR << "No explicit image output format was provided via 'out_type', and the suffix ('" << format << "') does not resemble a valid type. Please fix one of them." << std::endl;
        return ILLEGAL_PARAMETERS;
      }
    }
    MSExperiment<> exp;
    MzMLFile f;
    f.setLogType(log_type_);
    f.load(in, exp);

    exp.updateRanges(1);

    SignedSize rows = getIntOption_("height");
    if (rows == 0)
    {
      rows = exp.size();
    }
    if (rows <= 0)
    {
      writeLog_("Error: Zero rows is not possible.");
      return ILLEGAL_PARAMETERS;
    }

    SignedSize cols = getIntOption_("width");
    if (cols == 0)
    {
      cols = UInt(ceil(exp.getMaxMZ() - exp.getMinMZ()));
    }
    if (cols <= 0)
    {
      writeLog_("Error: Zero columns is not possible.");
      return ILLEGAL_PARAMETERS;
    }

    //----------------------------------------------------------------
    //Do the actual resampling
    BilinearInterpolation<double, double> bilip;
    bilip.getData().resize(rows, cols);

    if (!getFlag_("transpose"))
    {
      // scans run bottom-up:
      bilip.setMapping_0(0, exp.getMaxRT(), rows - 1, exp.getMinRT());
      // peaks run left-right:
      bilip.setMapping_1(0, exp.getMinMZ(), cols - 1, exp.getMaxMZ());

      for (MSExperiment<>::Iterator spec_iter = exp.begin();
           spec_iter != exp.end(); ++spec_iter)
      {
        if (spec_iter->getMSLevel() != 1) continue;
        for (MSExperiment<>::SpectrumType::ConstIterator peak1_iter =
               spec_iter->begin(); peak1_iter != spec_iter->end();
             ++peak1_iter)
        {
          bilip.addValue(spec_iter->getRT(), peak1_iter->getMZ(),
                         peak1_iter->getIntensity());
        }
      }
    }
    else     // transpose
    {
      // spectra run bottom-up:
      bilip.setMapping_0(0, exp.getMaxMZ(), rows - 1, exp.getMinMZ());
      // scans run left-right:
      bilip.setMapping_1(0, exp.getMinRT(), cols - 1, exp.getMaxRT());

      for (MSExperiment<>::Iterator spec_iter = exp.begin();
           spec_iter != exp.end(); ++spec_iter)
      {
        if (spec_iter->getMSLevel() != 1) continue;
        for (MSExperiment<>::SpectrumType::ConstIterator peak1_iter =
               spec_iter->begin(); peak1_iter != spec_iter->end();
             ++peak1_iter)
        {
          bilip.addValue(peak1_iter->getMZ(), spec_iter->getRT(),
                         peak1_iter->getIntensity());
        }
      }
    }

    //----------------------------------------------------------------
    //create and store image
    int scans = (int) bilip.getData().sizePair().first;
    int peaks = (int) bilip.getData().sizePair().second;

    MultiGradient gradient;
    String gradient_str = getStringOption_("gradient");
    if (gradient_str != "")
    {
      gradient.fromString(String("Linear|") + gradient_str);
    }
    else
    {
      gradient.fromString("Linear|0,#FFFFFF;2,#FFFF00;11,#FFAA00;32,#FF0000;55,#AA00FF;78,#5500FF;100,#000000");
    }

    bool use_log = getFlag_("log_intensity");
    writeDebug_("log_intensity: " + String(use_log), 1);

    QImage image(peaks, scans, QImage::Format_RGB32);
    string s = getStringOption_("background_color");
    QColor background_color(s.c_str());

    string feature_color_string = getStringOption_("feature_color");
    QColor feature_color(feature_color_string.c_str());

    QPainter * painter = new QPainter(&image);
    painter->setPen(background_color);
    painter->fillRect(0, 0, peaks, scans, Qt::SolidPattern);
    delete painter;

    double factor = getDoubleOption_("max_intensity");
    if (factor == 0)
    {
      factor = (*std::max_element(bilip.getData().begin(), bilip.getData().end()));
    }
    // logarithmize max. intensity as well:
    if (use_log) factor = std::log(factor);

    factor /= 100.0;
    for (int i = 0; i < scans; ++i)
    {
      for (int j = 0; j < peaks; ++j)
      {
        double value = bilip.getData().getValue(i, j);
        if (use_log) value = std::log(value);
        if (value > 1e-4)
        {
          image.setPixel(j, i, gradient.interpolatedColorAt(value / factor).rgb());
        }
        else
        {
          image.setPixel(j, i, background_color.rgb());
        }
      }
    }

    if (getFlag_("precursors"))
    {
      markMS2Locations_(exp, image, getFlag_("transpose"),
                        getStringOption_("precursor_color").toQString(),
                        Size(getIntOption_("precursor_size")));
    }

    if (!in_featureXML.empty())
    {
      FeatureMap feature_map;
      FeatureXMLFile ff;
      ff.load(in_featureXML, feature_map);
      markFeatureLocations_(feature_map, exp, image, getFlag_("transpose"), feature_color);
    }

    if (image.save(out.toQString(), format.c_str())) return EXECUTION_OK;
    else return CANNOT_WRITE_OUTPUT_FILE;
  }

Example #12

Show file

File: SILACAnalyzer.C Project: aiche/open-ms-mirror

  ExitCodes main_(int, const char **)
  {
    // data to be passed through the algorithm
    vector<vector<SILACPattern> > data;
    MSQuantifications msq;
    vector<Clustering *> cluster_data;

    // 
    // Parameter handling
    // 
    map<String, DoubleReal> label_identifiers;   // list defining the mass shifts of each label (e.g. "Arg6" => 6.0201290268)
    handleParameters_sample();
    handleParameters_algorithm();
    handleParameters_labels(label_identifiers);
    handleParameters();

    if (selected_labels.empty() && !out.empty()) // incompatible parameters
    {
      writeLog_("Error: The 'out' parameter cannot be used without a label (parameter 'sample:labels'). Use 'out_features' instead.");
      return ILLEGAL_PARAMETERS;
    }

    // 
    // Initializing the SILACAnalzer with our parameters
    // 
    SILACAnalyzer analyzer;
    analyzer.setLogType(log_type_);
    analyzer.initialize(
      // section "sample"
      selected_labels,
      charge_min,
      charge_max,
      missed_cleavages,
      isotopes_per_peptide_min,
      isotopes_per_peptide_max,
      // section "algorithm"
      rt_threshold,
      rt_min,
      intensity_cutoff,
      intensity_correlation,
      model_deviation,
      allow_missing_peaks,
      // labels
      label_identifiers);


    //--------------------------------------------------
    // loading input from .mzML
    //--------------------------------------------------

    MzMLFile file;
    MSExperiment<Peak1D> exp;

    // only read MS1 spectra ...
    /*
    std::vector<int> levels;
    levels.push_back(1);
    file.getOptions().setMSLevels(levels);
    */
    LOG_DEBUG << "Loading input..." << endl;
    file.setLogType(log_type_);
    file.load(in, exp);

    // set size of input map
    exp.updateRanges();

    // extract level 1 spectra
    exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<MSExperiment<Peak1D>::SpectrumType>(IntList::create("1"), true)), exp.end());

    // sort according to RT and MZ
    exp.sortSpectra();

    if (out_mzq != "")
    {
      vector<vector<String> > SILAClabels = analyzer.getSILAClabels(); // list of SILAC labels, e.g. selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6"

      std::vector<std::vector<std::pair<String, DoubleReal> > > labels;
      //add none label
      labels.push_back(std::vector<std::pair<String, DoubleReal> >(1, std::make_pair<String, DoubleReal>(String("none"), DoubleReal(0))));
      for (Size i = 0; i < SILAClabels.size(); ++i)       //SILACLabels MUST be in weight order!!!
      {
        std::vector<std::pair<String, DoubleReal> > one_label;
        for (UInt j = 0; j < SILAClabels[i].size(); ++j)
        {
          one_label.push_back(*(label_identifiers.find(SILAClabels[i][j])));              // this dereferencing would break if all SILAClabels would not have been checked before!
        }
        labels.push_back(one_label);
      }
      msq.registerExperiment(exp, labels);       //add assays
      msq.assignUIDs();
    }
    MSQuantifications::QUANT_TYPES quant_type = MSQuantifications::MS1LABEL;
    msq.setAnalysisSummaryQuantType(quant_type);    //add analysis_summary_

    //--------------------------------------------------
    // estimate peak width
    //--------------------------------------------------

    LOG_DEBUG << "Estimating peak width..." << endl;
    PeakWidthEstimator::Result peak_width;
    try
    {
      peak_width = analyzer.estimatePeakWidth(exp);
    }
    catch (Exception::InvalidSize &)
    {
      writeLog_("Error: Unable to estimate peak width of input data.");
      return INCOMPATIBLE_INPUT_DATA;
    }


    if (in_filters == "")
    {
      //--------------------------------------------------
      // filter input data
      //--------------------------------------------------

      LOG_DEBUG << "Filtering input data..." << endl;
      analyzer.filterData(exp, peak_width, data); 

      //--------------------------------------------------
      // store filter results
      //--------------------------------------------------

      if (out_filters != "")
      {
        LOG_DEBUG << "Storing filtering results..." << endl;
        ConsensusMap map;
        for (std::vector<std::vector<SILACPattern> >::const_iterator it = data.begin(); it != data.end(); ++it)
        {
          analyzer.generateFilterConsensusByPattern(map, *it);
        }
        analyzer.writeConsensus(out_filters, map);
      }
    }
    else
    {
      //--------------------------------------------------
      // load filter results
      //--------------------------------------------------

      LOG_DEBUG << "Loading filtering results..." << endl;
      ConsensusMap map;
      analyzer.readConsensus(in_filters, map);
      analyzer.readFilterConsensusByPattern(map, data);
    }

    //--------------------------------------------------
    // clustering
    //--------------------------------------------------

    LOG_DEBUG << "Clustering data..." << endl;
    analyzer.clusterData(exp, peak_width, cluster_data, data);

    //--------------------------------------------------------------
    // write output
    //--------------------------------------------------------------

    if (out_debug != "")
    {
      LOG_DEBUG << "Writing debug output file..." << endl;
      std::ofstream out((out_debug + ".clusters.csv").c_str());

      vector<vector<DoubleReal> > massShifts = analyzer.getMassShifts(); // list of mass shifts

      // generate header
      out
      << std::fixed << std::setprecision(8)
      << "ID,RT,MZ_PEAK,CHARGE";
      for (UInt i = 1; i <= massShifts[0].size(); ++i)
      {
        out << ",DELTA_MASS_" << i + 1;
      }
      for (UInt i = 0; i <= massShifts[0].size(); ++i)
      {
        for (UInt j = 1; j <= isotopes_per_peptide_max; ++j)
        {
          out << ",INT_PEAK_" << i + 1 << '_' << j;
        }
      }
      out << ",MZ_RAW";
      for (UInt i = 0; i <= massShifts[0].size(); ++i)
      {
        for (UInt j = 1; j <= isotopes_per_peptide_max; ++j)
        {
          out << ",INT_RAW_" << i + 1 << '_' << j;
        }
      }
      for (UInt i = 0; i <= massShifts[0].size(); ++i)
      {
        for (UInt j = 1; j <= isotopes_per_peptide_max; ++j)
        {
          out << ",MZ_RAW_" << i + 1 << '_' << j;
        }
      }
      out << '\n';

      // write data
      UInt cluster_id = 0;
      for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
      {
        analyzer.generateClusterDebug(out, **it, cluster_id);
      }
    }

    if (out != "")
    {
      LOG_DEBUG << "Generating output consensus map..." << endl;
      ConsensusMap map;

      for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
      {
        analyzer.generateClusterConsensusByCluster(map, **it);
      }

      LOG_DEBUG << "Adding meta data..." << endl;
      // XXX: Need a map per mass shift
      ConsensusMap::FileDescriptions& desc = map.getFileDescriptions();
      Size id = 0;
      for (ConsensusMap::FileDescriptions::iterator it = desc.begin(); it != desc.end(); ++it)
      {
        if (test_mode_) it->second.filename = in; // skip path, since its not cross platform and complicates verification
        else it->second.filename = File::basename(in);
        // Write correct label
        // (this would crash if used without a label!)
        if (id > 0) it->second.label = StringList(analyzer.getSILAClabels()[id - 1]).concatenate(""); // skip first round (empty label is not listed)
        ++id;
      }

      std::set<DataProcessing::ProcessingAction> actions;
      actions.insert(DataProcessing::DATA_PROCESSING);
      actions.insert(DataProcessing::PEAK_PICKING);
      actions.insert(DataProcessing::FILTERING);
      actions.insert(DataProcessing::QUANTITATION);

      addDataProcessing_(map, getProcessingInfo_(actions));

      analyzer.writeConsensus(out, map);
      if (out_mzq != "")
      {
        LOG_DEBUG << "Generating output mzQuantML file..." << endl;
        ConsensusMap numap(map);
        //calc. ratios
        for (ConsensusMap::iterator cit = numap.begin(); cit != numap.end(); ++cit)
        {
          //~ make ratio templates
          std::vector<ConsensusFeature::Ratio> rts;
          for (std::vector<MSQuantifications::Assay>::const_iterator ait = msq.getAssays().begin() + 1; ait != msq.getAssays().end(); ++ait)
          {
            ConsensusFeature::Ratio r;
            r.numerator_ref_ = String(msq.getAssays().begin()->uid_);
            r.denominator_ref_ = String(ait->uid_);
            r.description_.push_back("Simple ratio calc");
            r.description_.push_back("light to medium/.../heavy");
            //~ "<cvParam cvRef=\"PSI-MS\" accession=\"MS:1001132\" name=\"peptide ratio\"/>"
            rts.push_back(r);
          }

          const ConsensusFeature::HandleSetType& feature_handles = cit->getFeatures();
          if (feature_handles.size() > 1)
          {
            std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fit = feature_handles.begin();             // this is unlabeled
            fit++;
            for (; fit != feature_handles.end(); ++fit)
            {
              Size ri = std::distance(feature_handles.begin(), fit);
              rts[ri - 1].ratio_value_ =  feature_handles.begin()->getIntensity() / fit->getIntensity();             // a proper silacalanyzer algo should never have 0-intensities so no 0devison ...
            }
          }

          cit->setRatios(rts);
        }
        msq.addConsensusMap(numap);        //add SILACAnalyzer result

        //~ msq.addFeatureMap();//add SILACAnalyzer evidencetrail as soon as clear what is realy contained in the featuremap
        //~ add AuditCollection - no such concept in TOPPTools yet
        analyzer.writeMzQuantML(out_mzq, msq);
      }
    }

    if (out_clusters != "")
    {
      LOG_DEBUG << "Generating cluster output file..." << endl;
      ConsensusMap map;
      for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
      {
        UInt cluster_id = 0;
        analyzer.generateClusterConsensusByPattern(map, **it, cluster_id);
      }

      ConsensusMap::FileDescription & desc = map.getFileDescriptions()[0];
      desc.filename = in;
      desc.label = "Cluster";

      analyzer.writeConsensus(out_clusters, map);
    }

    if (out_features != "")
    {
      LOG_DEBUG << "Generating output feature map..." << endl;
      FeatureMap<> map;
      for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it)
      {
        analyzer.generateClusterFeatureByCluster(map, **it);
      }

      analyzer.writeFeatures(out_features, map);
    }

    return EXECUTION_OK;
  }

Example #13

Show file

File: IDSplitter.cpp Project: chahuistle/OpenMS

  ExitCodes main_(int, const char **)
  {
    String in = getStringOption_("in"), out = getStringOption_("out"),
           id_out = getStringOption_("id_out");

    if (out.empty() && id_out.empty())
    {
      throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__,
                                                 __PRETTY_FUNCTION__,
                                                 "out/id_out");
    }

    vector<ProteinIdentification> proteins;
    vector<PeptideIdentification> peptides;

    FileTypes::Type in_type = FileHandler::getType(in);

    if (in_type == FileTypes::MZML)
    {
      MSExperiment<> experiment;
      MzMLFile().load(in, experiment);
      // what about unassigned peptide IDs?
      for (MSExperiment<>::Iterator exp_it = experiment.begin();
           exp_it != experiment.end(); ++exp_it)
      {
        peptides.insert(peptides.end(),
                        exp_it->getPeptideIdentifications().begin(),
                        exp_it->getPeptideIdentifications().end());
        exp_it->getPeptideIdentifications().clear();
      }
      experiment.getProteinIdentifications().swap(proteins);
      if (!out.empty())
      {
        addDataProcessing_(experiment,
                           getProcessingInfo_(DataProcessing::FILTERING));
        MzMLFile().store(out, experiment);
      }
    }
    else if (in_type == FileTypes::FEATUREXML)
    {
      FeatureMap features;
      FeatureXMLFile().load(in, features);
      features.getUnassignedPeptideIdentifications().swap(peptides);
      for (FeatureMap::Iterator feat_it = features.begin();
           feat_it != features.end(); ++feat_it)
      {
        peptides.insert(peptides.end(),
                        feat_it->getPeptideIdentifications().begin(),
                        feat_it->getPeptideIdentifications().end());
        feat_it->getPeptideIdentifications().clear();
      }
      features.getProteinIdentifications().swap(proteins);
      if (!out.empty())
      {
        addDataProcessing_(features,
                           getProcessingInfo_(DataProcessing::FILTERING));
        FeatureXMLFile().store(out, features);
      }
    }
    else         // consensusXML
    {
      ConsensusMap consensus;
      ConsensusXMLFile().load(in, consensus);
      consensus.getUnassignedPeptideIdentifications().swap(peptides);
      for (ConsensusMap::Iterator cons_it = consensus.begin();
           cons_it != consensus.end(); ++cons_it)
      {
        peptides.insert(peptides.end(),
                        cons_it->getPeptideIdentifications().begin(),
                        cons_it->getPeptideIdentifications().end());
        cons_it->getPeptideIdentifications().clear();
      }
      consensus.getProteinIdentifications().swap(proteins);
      if (!out.empty())
      {
        addDataProcessing_(consensus,
                           getProcessingInfo_(DataProcessing::FILTERING));
        ConsensusXMLFile().store(out, consensus);
      }
    }

    if (!id_out.empty())
    {
      // IDMapper can match a peptide ID to several overlapping features,
      // resulting in duplicates; this shouldn't be the case for peak data
      if (in_type != FileTypes::MZML) removeDuplicates_(peptides);
      IdXMLFile().store(id_out, proteins, peptides);
    }

    return EXECUTION_OK;
  }

Example #14

Show file

File: IsobaricChannelExtractor.C Project: aiche/open-ms-mirror

  void IsobaricChannelExtractor::extractChannels(const MSExperiment<Peak1D>& ms_exp_data, ConsensusMap& consensus_map)
  {
    if (ms_exp_data.empty())
    {
      LOG_WARN << "The given file does not contain any conventional peak data, but might"
                  " contain chromatograms. This tool currently cannot handle them, sorry.\n";
      throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Experiment has no scans!");
    }

    // clear the output map
    consensus_map.clear(false);
    consensus_map.setExperimentType("labeled_MS2");

    // create predicate for spectrum checking
    LOG_INFO << "Selecting scans with activation mode: " << (selected_activation_ == "" ? "any" : selected_activation_) << "\n";
    HasActivationMethod<MSExperiment<Peak1D>::SpectrumType> activation_predicate(StringList::create(selected_activation_));

    // now we have picked data
    // --> assign peaks to channels
    UInt64 element_index(0);

    // remember the current precusor spectrum
    MSExperiment<Peak1D>::ConstIterator prec_spec = ms_exp_data.end();

    for (MSExperiment<Peak1D>::ConstIterator it = ms_exp_data.begin(); it != ms_exp_data.end(); ++it)
    {
      // remember the last MS1 spectra as we assume it to be the precursor spectrum
      if (it->getMSLevel() ==  1) prec_spec = it;

      if (selected_activation_ == "" || activation_predicate(*it))
      {
        // check if precursor is available
        if (it->getPrecursors().empty())
        {
          throw Exception::MissingInformation(__FILE__, __LINE__, __PRETTY_FUNCTION__, String("No precursor information given for scan native ID ") + it->getNativeID() + " with RT " + String(it->getRT()));
        }

        // check precursor constraints
        if (!isValidPrecursor_(it->getPrecursors()[0]))
        {
          LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor doesn't fulfill all constraints." << std::endl;
          continue;
        }

        // check precursor purity if we have a valid precursor ..
        if (prec_spec != ms_exp_data.end())
        {
          const DoubleReal purity = computePrecursorPurity_(it, prec_spec);
          if (purity < min_precursor_purity_)
          {
            LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold. [purity = " << purity << "]" << std::endl;
            continue;
          }
        }
        else
        {
          LOG_INFO << "No precursor available for spectrum: " << it->getNativeID() << std::endl;
        }
        if (!(prec_spec == ms_exp_data.end()) && computePrecursorPurity_(it, prec_spec) < min_precursor_purity_)
        {
          LOG_DEBUG << "Skip spectrum " << it->getNativeID() << ": Precursor purity is below the threshold." << std::endl;
          continue;
        }

        // store RT&MZ of parent ion as centroid of ConsensusFeature
        ConsensusFeature cf;
        cf.setUniqueId();
        cf.setRT(it->getRT());
        cf.setMZ(it->getPrecursors()[0].getMZ());

        Peak2D channel_value;
        channel_value.setRT(it->getRT());
        // for each each channel
        UInt64 map_index = 0;
        Peak2D::IntensityType overall_intensity = 0;
        for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin();
             cl_it != quant_method_->getChannelInformation().end();
             ++cl_it)
        {
          // set mz-position of channel
          channel_value.setMZ(cl_it->center);
          // reset intensity
          channel_value.setIntensity(0);

          // as every evaluation requires time, we cache the MZEnd iterator
          const MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_end = it->MZEnd(cl_it->center + reporter_mass_shift_);

          // add up all signals
          for (MSExperiment<Peak1D>::SpectrumType::ConstIterator mz_it = it->MZBegin(cl_it->center - reporter_mass_shift_);
               mz_it != mz_end;
               ++mz_it)
          {
            channel_value.setIntensity(channel_value.getIntensity() + mz_it->getIntensity());
          }

          // discard contribution of this channel as it is below the required intensity threshold
          if (channel_value.getIntensity() < min_reporter_intensity_)
          {
            channel_value.setIntensity(0);
          }

          overall_intensity += channel_value.getIntensity();
          // add channel to ConsensusFeature
          cf.insert(map_index++, channel_value, element_index);
        } // ! channel_iterator

        // check if we keep this feature or if it contains low-intensity quantifications
        if (remove_low_intensity_quantifications_ && hasLowIntensityReporter_(cf))
        {
          continue;
        }

        // check featureHandles are not empty
        if (overall_intensity == 0)
        {
          cf.setMetaValue("all_empty", String("true"));
        }
        cf.setIntensity(overall_intensity);
        consensus_map.push_back(cf);

        // the tandem-scan in the order they appear in the experiment
        ++element_index;
      }
    } // ! Experiment iterator

    /// add meta information to the map
    registerChannelsInOutputMap_(consensus_map);
  }

Example #15

Show file

File: IDFileConverter.cpp Project: BioinformaticsArchive/OpenMS

  ExitCodes
  main_(int, const char**)
  {
    //-------------------------------------------------------------
    // general variables and data
    //-------------------------------------------------------------
    FileHandler fh;
    vector<PeptideIdentification> peptide_identifications;
    vector<ProteinIdentification> protein_identifications;

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    const String in = getStringOption_("in");

    ProgressLogger logger;
    logger.setLogType(ProgressLogger::CMD);
    logger.startProgress(0, 1, "Loading...");

    if (File::isDirectory(in))
    {
      const String in_directory = File::absolutePath(in).ensureLastChar('/');
      const String mz_file = getStringOption_("mz_file");
      const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide");

      UInt i = 0;
      FileHandler fh;
      FileTypes::Type type;
      MSExperiment<Peak1D> msexperiment;
      // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;)  However, now String::toInt() might throw.
      map<Int, float> num_and_rt;
      vector<String> NativeID;

      // The mz-File (if given)
      if (!mz_file.empty())
      {
        type = fh.getTypeByFileName(mz_file);
        fh.loadExperiment(mz_file, msexperiment, type);

        for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it)
        {
          String(spectra_it->getNativeID()).split('=', NativeID);
          try
          {
            num_and_rt[NativeID[1].toInt()] = spectra_it->getRT();
            // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01
          }
          catch (Exception::ConversionError& e)
          {
            writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
          }
        }
      }

      // Get list of the actual Sequest .out-Files
      StringList in_files;
      if (!File::fileList(in_directory, String("*.out"), in_files))
      {
        writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!");
      }

      // Now get to work ...
      for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it)
      {
        vector<PeptideIdentification> peptide_ids_seq;
        ProteinIdentification protein_id_seq;
        vector<double> pvalues_seq;
        vector<String> in_file_vec;

        SequestOutfile sequest_outfile;

        writeDebug_(String("Reading file ") + *in_files_it, 3);

        try
        {
          sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide);

          in_files_it->split('.', in_file_vec);

          for (Size j = 0; j < peptide_ids_seq.size(); ++j)
          {

            // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files.
            peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i);

            Int scan_number = 0;
            if (!mz_file.empty())
            {
              try
              {
                scan_number = in_file_vec[2].toInt();
                peptide_ids_seq[j].setRT(num_and_rt[scan_number]);
              }
              catch (Exception::ConversionError& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
              }
              catch (exception& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.what());
              }
              //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz
              const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge();
              peptide_ids_seq[j].setMZ(real_mz);
            }

            writeDebug_(String("scan: ") + String(scan_number) + String("  RT: ") + String(peptide_ids_seq[j].getRT()) + "  MZ: " + String(peptide_ids_seq[j].getMZ()) + "  Ident: " + peptide_ids_seq[j].getIdentifier(), 4);

            peptide_identifications.push_back(peptide_ids_seq[j]);
          }

          protein_id_seq.setIdentifier(*in_files_it + "_" + i);
          protein_identifications.push_back(protein_id_seq);
          ++i;
        }
        catch (Exception::ParseError& pe)
        {
          writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")");
          throw;
        }
        catch (...)
        {
          writeLog_(String("Error reading file: ") + *in_files_it);
          throw;
        }
      }

      writeDebug_("All files processed.", 3);
    } // ! directory
    else
    {
      FileTypes::Type in_type = fh.getType(in);

      if (in_type == FileTypes::PEPXML)
      {
        String exp_name = getStringOption_("mz_file");
        String orig_name =  getStringOption_("mz_name");
        bool use_precursor_data = getFlag_("use_precursor_data");

        if (exp_name.empty())
        {
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, orig_name);
        }
        else
        {
          MSExperiment<> exp;
          fh.loadExperiment(exp_name, exp);
          if (!orig_name.empty())
          {
            exp_name = orig_name;
          }
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, exp_name, exp,
                            use_precursor_data);
        }
      }
      else if (in_type == FileTypes::IDXML)
      {
        IdXMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::MZIDENTML)
      {
        LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl;
        MzIdentMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::PROTXML)
      {
        protein_identifications.resize(1);
        peptide_identifications.resize(1);
        ProtXMLFile().load(in, protein_identifications[0],
                           peptide_identifications[0]);
      }
      else if (in_type == FileTypes::OMSSAXML)
      {
        protein_identifications.resize(1);
        OMSSAXMLFile().load(in, protein_identifications[0],
                            peptide_identifications, true);
      }
      else if (in_type == FileTypes::MASCOTXML)
      {
        String scan_regex = getStringOption_("scan_regex");
        String exp_name = getStringOption_("mz_file");
        MascotXMLFile::RTMapping rt_mapping;
        if (!exp_name.empty())
        {
          PeakMap exp;
          // load only MS2 spectra:
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping);
        }
        protein_identifications.resize(1);
        MascotXMLFile().load(in, protein_identifications[0],
                             peptide_identifications, rt_mapping, scan_regex);
      }
      else if (in_type == FileTypes::XML)
      {
        ProteinIdentification protein_id;
        XTandemXMLFile().load(in, protein_id, peptide_identifications);
        protein_id.setSearchEngineVersion("");
        protein_id.setSearchEngine("XTandem");
        protein_identifications.push_back(protein_id);
        String exp_name = getStringOption_("mz_file");
        if (!exp_name.empty())
        {
          PeakMap exp;
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it)
          {
            UInt id = (Int)it->getMetaValue("spectrum_id");
            --id; // native IDs were written 1-based
            if (id < exp.size())
            {
              it->setRT(exp[id].getRT());
              double pre_mz(0.0);
              if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ();
              it->setMZ(pre_mz);
              it->removeMetaValue("spectrum_id");
            }
            else
            {
              LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl;
            }
          }
        }
      }
      else
      {
        writeLog_("Unknown input file type given. Aborting!");
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
    }
    logger.endProgress();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    const String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));
    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }
    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    logger.startProgress(0, 1, "Storing...");
    if (out_type == FileTypes::PEPXML)
    {
      bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed");
      String mz_file = getStringOption_("mz_file");
      String mz_name = getStringOption_("mz_name");
      PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed);
    }
    else if (out_type == FileTypes::IDXML)
    {
      IdXMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::MZIDENTML)
    {
      MzIdentMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::FASTA)
    {
      Size count = 0;
      ofstream fasta(out.c_str(), ios::out);
      for (Size i = 0; i < peptide_identifications.size(); ++i)
      {
        for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l)
        {
          const PeptideHit& hit = peptide_identifications[i].getHits()[l];
          fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++
                << "|" << hit.getSequence().toString() << endl;
          String seq = hit.getSequence().toUnmodifiedString();
          // FASTA files should have at most 60 characters of sequence info per line
          for (Size j = 0; j < seq.size(); j += 60)
          {
            Size k = min(j + 60, seq.size());
            fasta << string(seq[j], seq[k]) << endl;
          }
        }
      }
    }
    else
    {
      writeLog_("Unsupported output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }
    logger.endProgress();


    return EXECUTION_OK;
  }

Example #16

Show file

File: QCCalculator.cpp Project: BioinformaticsArchive/OpenMS

  ExitCodes main_(int, const char**)
  {
    vector<ProteinIdentification> prot_ids;
    vector<PeptideIdentification> pep_ids;
    ProteinHit temp_protein_hit;

    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String inputfile_id               = getStringOption_("id");
    String inputfile_feature       = getStringOption_("feature");
    String inputfile_consensus  = getStringOption_("consensus");
    String inputfile_raw            = getStringOption_("in");
    String outputfile_name       = getStringOption_("out");

    //~ bool Ms1(getFlag_("MS1"));
    //~ bool Ms2(getFlag_("MS2"));
    bool remove_duplicate_features(getFlag_("remove_duplicate_features"));
    
    //-------------------------------------------------------------
    // fetch vocabularies
    //------------------------------------------------------------
    ControlledVocabulary cv;
    cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo"));
    cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo"));
 
     QcMLFile qcmlfile;

    //-------------------------------------------------------------
    // MS  aqiusition
    //------------------------------------------------------------
    String base_name = QFileInfo(QString::fromStdString(inputfile_raw)).baseName();

    cout << "Reading mzML file..." << endl;
    MzMLFile mz_data_file;
    MSExperiment<Peak1D> exp;
    MzMLFile().load(inputfile_raw, exp);
    
    //---prep input
    exp.sortSpectra();
    UInt min_mz = std::numeric_limits<UInt>::max();
    UInt max_mz = 0;
    std::map<Size, UInt> mslevelcounts;
    
    qcmlfile.registerRun(base_name,base_name); //TODO use UIDs
    
    //---base MS aquisition qp
    String msaq_ref = base_name + "_msaq";
    QcMLFile::QualityParameter qp;
    qp.id = msaq_ref; ///< Identifier
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000004";
    try
    {
      //~ const ControlledVocabulary::CVTerm& test = cv.getTermByName("MS aquisition result details");
      //~ cout << test.name << test.id << endl;
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      //~ const ControlledVocabulary::CVTerm& term = cv.getTerm("0000004");
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "mzML file"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    //---file origin qp
    qp = QcMLFile::QualityParameter();
    qp.name = "mzML file"; ///< Name
    qp.id = base_name + "_run_name"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000577";
    qp.value = base_name;
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    qp = QcMLFile::QualityParameter();
    qp.name = "instrument model"; ///< Name
    qp.id = base_name + "_instrument_name"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000031";
    qp.value = exp.getInstrument().getName();
    qcmlfile.addRunQualityParameter(base_name, qp);    

    qp = QcMLFile::QualityParameter();
    qp.name = "completion time"; ///< Name
    qp.id = base_name + "_date"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000747";
    qp.value = exp.getDateTime().getDate();
    qcmlfile.addRunQualityParameter(base_name, qp);

    //---precursors at
    QcMLFile::Attachment at;
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000044";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_precursors"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "precursors"; ///< Name
    }

    at.colTypes.push_back("MS:1000894_[sec]"); //RT
    at.colTypes.push_back("MS:1000040"); //MZ
    for (Size i = 0; i < exp.size(); ++i)
    {
      mslevelcounts[exp[i].getMSLevel()]++;
      if (exp[i].getMSLevel() == 2)
      {
        if (exp[i].getPrecursors().front().getMZ() < min_mz)
        {
          min_mz = exp[i].getPrecursors().front().getMZ();
        }
        if (exp[i].getPrecursors().front().getMZ() > max_mz)
        {
          max_mz = exp[i].getPrecursors().front().getMZ();
        }
        std::vector<String> row;
        row.push_back(exp[i].getRT());
        row.push_back(exp[i].getPrecursors().front().getMZ());
        at.tableRows.push_back(row);
      }
    }
    qcmlfile.addRunAttachment(base_name, at);

    //---aquisition results qp
    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000006"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_ms1aquisition"; ///< Identifier
    qp.value = String(mslevelcounts[1]);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of ms1 spectra"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    

    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000007"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_ms2aquisition"; ///< Identifier
    qp.value = String(mslevelcounts[2]);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of ms2 spectra"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);

    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000008"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_Chromaquisition"; ///< Identifier
    qp.value = String(exp.getChromatograms().size());
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of chromatograms"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000009";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_mzrange"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS MZ aquisition ranges"; ///< Name
    }

    at.colTypes.push_back("QC:0000010"); //MZ
    at.colTypes.push_back("QC:0000011"); //MZ
    std::vector<String> rowmz;
    rowmz.push_back(String(min_mz));
    rowmz.push_back(String(max_mz));
    at.tableRows.push_back(rowmz);
    qcmlfile.addRunAttachment(base_name, at);

    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000012";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_rtrange"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS RT aquisition ranges"; ///< Name
    }

    at.colTypes.push_back("QC:0000013"); //MZ
    at.colTypes.push_back("QC:0000014"); //MZ
    std::vector<String> rowrt;
    rowrt.push_back(String(exp.begin()->getRT()));
    rowrt.push_back(String(exp.getSpectra().back().getRT()));
    at.tableRows.push_back(rowrt);
    qcmlfile.addRunAttachment(base_name, at);
    

    //---ion current stability ( & tic ) qp
    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000022";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_tics"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS TICs"; ///< Name
    }
    
    at.colTypes.push_back("MS:1000894_[sec]");
    at.colTypes.push_back("MS:1000285");
    UInt max = 0;
    Size below_10k = 0;
    for (Size i = 0; i < exp.size(); ++i)
    {
      if (exp[i].getMSLevel() == 1)
      {
        UInt sum = 0;
        for (Size j = 0; j < exp[i].size(); ++j)
        {
          sum += exp[i][j].getIntensity();
        }
        if (sum > max)
        {
          max = sum;
        }
        if (sum < 10000)
        {
          ++below_10k;
        }
        std::vector<String> row;
        row.push_back(exp[i].getRT());
        row.push_back(sum);
        at.tableRows.push_back(row);
      }
    }
    qcmlfile.addRunAttachment(base_name, at);
    

    qp = QcMLFile::QualityParameter();
    qp.id = base_name + "_ticslump"; ///< Identifier
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000023";
    qp.value = String((100 / exp.size()) * below_10k);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "percentage of tic slumps"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);

    
    //-------------------------------------------------------------
    // MS  id
    //------------------------------------------------------------
    if (inputfile_id != "")
    {
      IdXMLFile().load(inputfile_id, prot_ids, pep_ids);
      cerr << "idXML read ended. Found " << pep_ids.size() << " peptide identifications." << endl;

      ProteinIdentification::SearchParameters params = prot_ids[0].getSearchParameters();
      vector<String> var_mods = params.variable_modifications;
      //~ boost::regex re("(?<=[KR])(?=[^P])");
     
      String msid_ref = base_name + "_msid";
      QcMLFile::QualityParameter qp;
      qp.id = msid_ref; ///< Identifier
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000025";
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "MS identification result details"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000026";
      at.qualityRef = msid_ref;
      at.id = base_name + "_idsetting"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "MS id settings"; ///< Name
      }
      
      at.colTypes.push_back("MS:1001013"); //MS:1001013 db name  MS:1001016 version  MS:1001020 taxonomy
      at.colTypes.push_back("MS:1001016");
      at.colTypes.push_back("MS:1001020");
      std::vector<String> row;
      row.push_back(String(prot_ids.front().getSearchParameters().db));
      row.push_back(String(prot_ids.front().getSearchParameters().db_version));
      row.push_back(String(prot_ids.front().getSearchParameters().taxonomy));
      at.tableRows.push_back(row);
      qcmlfile.addRunAttachment(base_name, at);


      UInt spectrum_count = 0;
      Size peptide_hit_count = 0;
      UInt runs_count = 0;
      Size protein_hit_count = 0;
      set<String> peptides;
      set<String> proteins;
      Size missedcleavages = 0;
      for (Size i = 0; i < pep_ids.size(); ++i)
      {
        if (!pep_ids[i].empty())
        {
          ++spectrum_count;
          peptide_hit_count += pep_ids[i].getHits().size();
          const vector<PeptideHit>& temp_hits = pep_ids[i].getHits();
          for (Size j = 0; j < temp_hits.size(); ++j)
          {
            peptides.insert(temp_hits[j].getSequence().toString());
          }
        }
      }
      for (set<String>::iterator it = peptides.begin(); it != peptides.end(); ++it)
      {
        for (String::const_iterator st = it->begin(); st != it->end() - 1; ++st)
        {
          if (*st == 'K' || *st == 'R')
          {
            ++missedcleavages;
          }
        }
      }

      for (Size i = 0; i < prot_ids.size(); ++i)
      {
        ++runs_count;
        protein_hit_count += prot_ids[i].getHits().size();
        const vector<ProteinHit>& temp_hits = prot_ids[i].getHits();
        for (Size j = 0; j < temp_hits.size(); ++j)
        {
          proteins.insert(temp_hits[j].getAccession());
        }
      }
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000037"; ///< cv accession
      qp.id = base_name + "_misscleave"; ///< Identifier
      qp.value = missedcleavages;
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "total number of missed cleavages"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);

      
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000032"; ///< cv accession
      qp.id = base_name + "_totprot"; ///< Identifier
      qp.value = protein_hit_count;
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "total number of identified proteins"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000033"; ///< cv accession
      qp.id = base_name + "_totuniqprot"; ///< Identifier
      qp.value = String(proteins.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of uniquely identified proteins"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000029"; ///< cv accession
      qp.id = base_name + "_psms"; ///< Identifier
      qp.value = String(spectrum_count);
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of PSM"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000030"; ///< cv accession
      qp.id = base_name + "_totpeps"; ///< Identifier
      qp.value = String(peptide_hit_count);
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of identified peptides"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000031"; ///< cv accession
      qp.id = base_name + "_totuniqpeps"; ///< Identifier
      qp.value = String(peptides.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of uniquely identified peptides"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000038";
      at.qualityRef = msid_ref;
      at.id = base_name + "_massacc"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "delta ppm tables";
      }
      
      //~ delta ppm QC:0000039 RT MZ uniqueness ProteinID MS:1000885 target/decoy Score PeptideSequence MS:1000889 Annots string Similarity Charge UO:0000219 TheoreticalWeight UO:0000221 Oxidation_(M)
      at.colTypes.push_back("RT");
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("Score");
      at.colTypes.push_back("PeptideSequence");
      at.colTypes.push_back("Charge");
      at.colTypes.push_back("TheoreticalWeight");
      at.colTypes.push_back("delta_ppm");
      for (UInt w = 0; w < var_mods.size(); ++w)
      {
        at.colTypes.push_back(String(var_mods[w]).substitute(' ', '_'));
      }

      std::vector<double> deltas;
      //~ prot_ids[0].getSearchParameters();
      for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it)
      {
        if (it->getHits().size() > 0)
        {
          std::vector<String> row;
          row.push_back(it->getRT());
          row.push_back(it->getMZ());
          PeptideHit tmp = it->getHits().front(); //TODO depends on score & sort
          vector<UInt> pep_mods;
          for (UInt w = 0; w < var_mods.size(); ++w)
          {
            pep_mods.push_back(0);
          }
          for (AASequence::ConstIterator z =  tmp.getSequence().begin(); z != tmp.getSequence().end(); ++z)
          {
            Residue res = *z;
            String temp;
            if (res.getModification().size() > 0 && res.getModification() != "Carbamidomethyl")
            {
              temp = res.getModification() + " (" + res.getOneLetterCode()  + ")";
              //cout<<res.getModification()<<endl;
              for (UInt w = 0; w < var_mods.size(); ++w)
              {
                if (temp == var_mods[w])
                {
                  //cout<<temp;
                  pep_mods[w] += 1;
                }
              }
            }
          }
          row.push_back(tmp.getScore());
          row.push_back(tmp.getSequence().toString().removeWhitespaces());
          row.push_back(tmp.getCharge());
          row.push_back(String((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()));
          double dppm = /* std::abs */ (getMassDifference(((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()), it->getMZ(), true));
          row.push_back(String(dppm));
          deltas.push_back(dppm);
          for (UInt w = 0; w < var_mods.size(); ++w)
          {
            row.push_back(pep_mods[w]);
          }
          at.tableRows.push_back(row);
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
      

      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000040"; ///< cv accession
      qp.id = base_name + "_mean_delta"; ///< Identifier
      qp.value = String(OpenMS::Math::mean(deltas.begin(), deltas.end()));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "mean delta ppm"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000041"; ///< cv accession
      qp.id = base_name + "_median_delta"; ///< Identifier
      qp.value = String(OpenMS::Math::median(deltas.begin(), deltas.end(), false));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "median delta ppm"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000035"; ///< cv accession
      qp.id = base_name + "_ratio_id"; ///< Identifier
      qp.value = String(double(pep_ids.size()) / double(mslevelcounts[2]));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "id ratio"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);
    }

    //-------------------------------------------------------------
    // MS quantitation
    //------------------------------------------------------------
    FeatureMap map;
    String msqu_ref = base_name + "_msqu";
    if (inputfile_feature != "")
    {
      FeatureXMLFile f;
      f.load(inputfile_feature, map);

      cout << "Read featureXML file..." << endl;

      //~ UInt fiter = 0;
      map.sortByRT();
      map.updateRanges();

      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000045"; ///< cv accession
      qp.id = msqu_ref; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "MS quantification result details"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);
      
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000046"; ///< cv accession
      qp.id = base_name + "_feature_count"; ///< Identifier
      qp.value = String(map.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "number of features"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);      
    }

    if (inputfile_feature != "" && !remove_duplicate_features)
    {
      
      QcMLFile::Attachment at;
      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000047";
      at.qualityRef = msqu_ref;
      at.id = base_name + "_features"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "features"; ///< Name
      }
      
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("RT");
      at.colTypes.push_back("Intensity");
      at.colTypes.push_back("Charge");
      at.colTypes.push_back("Quality");
      at.colTypes.push_back("FWHM");
      at.colTypes.push_back("IDs");
      UInt fiter = 0;
      map.sortByRT();
      //ofstream out(outputfile_name.c_str());
      while (fiter < map.size())
      {
        std::vector<String> row;
        row.push_back(map[fiter].getMZ());
        row.push_back(map[fiter].getRT());
        row.push_back(map[fiter].getIntensity());
        row.push_back(map[fiter].getCharge());
        row.push_back(map[fiter].getOverallQuality());
        row.push_back(map[fiter].getWidth());
        row.push_back(map[fiter].getPeptideIdentifications().size());
        fiter++;
        at.tableRows.push_back(row);
      }     
      qcmlfile.addRunAttachment(base_name, at);
    }
    else if (inputfile_feature != "" && remove_duplicate_features)
    {
      QcMLFile::Attachment at;
      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000047";
      at.qualityRef = msqu_ref;
      at.id = base_name + "_features"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "features"; ///< Name
      }
      
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("RT");
      at.colTypes.push_back("Intensity");
      at.colTypes.push_back("Charge");
      FeatureMap map, map_out;
      FeatureXMLFile f;
      f.load(inputfile_feature, map);
      UInt fiter = 0;
      map.sortByRT();
      while (fiter < map.size())
      {
        FeatureMap map_tmp;
        for (UInt k = fiter; k <= map.size(); ++k)
        {
          if (abs(map[fiter].getRT() - map[k].getRT()) < 0.1)
          {
            //~ cout << fiter << endl;
            map_tmp.push_back(map[k]);
          }
          else
          {
            fiter = k;
            break;
          }
        }
        map_tmp.sortByMZ();
        UInt retif = 1;
        map_out.push_back(map_tmp[0]);
        while (retif < map_tmp.size())
        {
          if (abs(map_tmp[retif].getMZ() - map_tmp[retif - 1].getMZ()) > 0.01)
          {
            cout << "equal RT, but mass different" << endl;
            map_out.push_back(map_tmp[retif]);
          }
          retif++;
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
    }
    if (inputfile_consensus != "")
    {
      cout << "Reading consensusXML file..." << endl;
      ConsensusXMLFile f;
      ConsensusMap map;
      f.load(inputfile_consensus, map);
      //~ String CONSENSUS_NAME = "_consensus.tsv";
      //~ String combined_out = outputfile_name + CONSENSUS_NAME;
      //~ ofstream out(combined_out.c_str());

      at = QcMLFile::Attachment();
      qp.name = "consensuspoints"; ///< Name
      //~ qp.id = base_name + "_consensuses"; ///< Identifier
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:xxxxxxxx"; ///< cv accession "featuremapper results"

      at.colTypes.push_back("Native_spectrum_ID");
      at.colTypes.push_back("DECON_RT_(sec)");
      at.colTypes.push_back("DECON_MZ_(Th)");
      at.colTypes.push_back("DECON_Intensity");
      at.colTypes.push_back("Feature_RT_(sec)");
      at.colTypes.push_back("Feature_MZ_(Th)");
      at.colTypes.push_back("Feature_Intensity");
      at.colTypes.push_back("Feature_Charge");
      for (ConsensusMap::const_iterator cmit = map.begin(); cmit != map.end(); ++cmit)
      {
        const ConsensusFeature& CF = *cmit;
        for (ConsensusFeature::const_iterator cfit = CF.begin(); cfit != CF.end(); ++cfit)
        {
          std::vector<String> row;
          FeatureHandle FH = *cfit;
          row.push_back(CF.getMetaValue("spectrum_native_id"));
          row.push_back(CF.getRT()); row.push_back(CF.getMZ());
          row.push_back(CF.getIntensity());
          row.push_back(FH.getRT());
          row.push_back(FH.getMZ());
          row.push_back(FH.getCharge());
          at.tableRows.push_back(row);
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
    }
    
    
    //-------------------------------------------------------------
    // finalize
    //------------------------------------------------------------
    qcmlfile.store(outputfile_name);
    return EXECUTION_OK;
  }

Example #17

Show file

File: FileMerger.cpp Project: chahuistle/OpenMS

  ExitCodes main_(int, const char**)
  {

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    // file list
    StringList file_list = getStringList_("in");

    // file type
    FileHandler file_handler;
    FileTypes::Type force_type;
    if (getStringOption_("in_type").size() > 0)
    {
      force_type = FileTypes::nameToType(getStringOption_("in_type"));
    }
    else
    {
      force_type = file_handler.getType(file_list[0]);
    }

    // output file names and types
    String out_file = getStringOption_("out");

    bool annotate_file_origin =  getFlag_("annotate_file_origin");
    rt_gap_ = getDoubleOption_("rt_concat:gap");
    vector<String> trafo_out = getStringList_("rt_concat:trafo_out");
    if (trafo_out.empty())
    {
      // resize now so we don't have to worry about indexing out of bounds:
      trafo_out.resize(file_list.size());
    }
    else if (trafo_out.size() != file_list.size())
    {
      writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!");
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    if (force_type == FileTypes::FEATUREXML)
    {
      FeatureMap out;
      FeatureXMLFile fh;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        FeatureMap map;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }

        if (rt_gap_ > 0.0) // concatenate in RT
        {
          adjustRetentionTimes_(map, trafo_out[i], i == 0);
        }

        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      fh.store(out_file, out);
    }

    else if (force_type == FileTypes::CONSENSUSXML)
    {
      ConsensusMap out;
      ConsensusXMLFile fh;
      fh.load(file_list[0], out);
      // skip first file
      for (Size i = 1; i < file_list.size(); ++i)
      {
        ConsensusMap map;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }

        if (rt_gap_ > 0.0) // concatenate in RT
        {
          adjustRetentionTimes_(map, trafo_out[i], i == 0);
        }

        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      fh.store(out_file, out);
    }

    else if (force_type == FileTypes::TRAML)
    {
      TargetedExperiment out;
      TraMLFile fh;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        TargetedExperiment map;
        fh.load(file_list[i], map);
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info
      Software software;
      software.setName("FileMerger");
      software.setVersion(VersionInfo::getVersion());
      out.addSoftware(software);

      fh.store(out_file, out);
    }
    else // raw data input (e.g. mzML)
    {
      // RT
      bool rt_auto_number = getFlag_("raw:rt_auto");
      bool rt_filename = getFlag_("raw:rt_filename");
      bool rt_custom = false;
      DoubleList custom_rts = getDoubleList_("raw:rt_custom");
      if (!custom_rts.empty())
      {
        rt_custom = true;
        if (custom_rts.size() != file_list.size())
        {
          writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!");
          return ILLEGAL_PARAMETERS;
        }
      }

      // MS level
      Int ms_level = getIntOption_("raw:ms_level");

      MSExperiment<> out;
      UInt rt_auto = 0;
      UInt native_id = 0;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        String filename = file_list[i];

        // load file
        force_type = file_handler.getType(file_list[i]);
        MSExperiment<> in;
        file_handler.loadExperiment(filename, in, force_type, log_type_);

        if (in.empty() && in.getChromatograms().empty())
        {
          writeLog_(String("Warning: Empty file '") + filename + "'!");
          continue;
        }
        out.reserve(out.size() + in.size());

        // warn if custom RT and more than one scan in input file
        if (rt_custom && in.size() > 1)
        {
          writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!");
        }

        // handle special raw data options:
        for (MSExperiment<>::iterator spec_it = in.begin();
             spec_it != in.end(); ++spec_it)
        {
          float rt_final = spec_it->getRT();
          if (rt_auto_number)
          {
            rt_final = ++rt_auto;
          }
          else if (rt_custom)
          {
            rt_final = custom_rts[i];
          }
          else if (rt_filename)
          {
            static const boost::regex re("rt(\\d+(\\.\\d+)?)");
            boost::smatch match;
            bool found = boost::regex_search(filename, match, re);
            if (found)
            {
              rt_final = String(match[1]).toFloat();
            }
            else
            {
              writeLog_("Warning: could not extract retention time from filename '" + filename + "'");
            }
          }

          // none of the rt methods were successful
          if (rt_final < 0)
          {
            writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'");
          }

          spec_it->setRT(rt_final);
          spec_it->setNativeID("spectrum=" + String(native_id));
          if (ms_level > 0)
          {
            spec_it->setMSLevel(ms_level);
          }
          ++native_id;
        }

        // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles)
        if (in.size() == 1)
        {
          in[0].setSourceFile(in.getSourceFiles()[0]);
          in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways)
        }

        if (rt_gap_ > 0.0) // concatenate in RT
        {
          adjustRetentionTimes_(in, trafo_out[i], i == 0);
        }

        // add spectra to output
        for (MSExperiment<>::const_iterator spec_it = in.begin();
             spec_it != in.end(); ++spec_it)
        {
          out.addSpectrum(*spec_it);
        }
        // also add the chromatograms
        for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator
               chrom_it = in.getChromatograms().begin(); chrom_it != 
               in.getChromatograms().end(); ++chrom_it)
        {
          out.addChromatogram(*chrom_it);
        }

        // copy experimental settings from first file
        if (i == 0)
        {
          out.ExperimentalSettings::operator=(in);
        }
        else // otherwise append
        {
          out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then
        }
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      // annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      MzMLFile f;
      f.setLogType(log_type_);
      f.store(out_file, out);
    }

    return EXECUTION_OK;
  }

Example #18

Show file

File: FileMerger.C Project: aiche/open-ms-mirror

  ExitCodes main_(int, const char**)
  {

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    //file list
    StringList file_list = getStringList_("in");

    //file type
    FileHandler fh;
    FileTypes::Type force_type;
    if (getStringOption_("in_type").size() > 0)
    {
      force_type = FileTypes::nameToType(getStringOption_("in_type"));
    }
    else
    {
      force_type = fh.getType(file_list[0]);
    }

    //output file names and types
    String out_file = getStringOption_("out");

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    bool annotate_file_origin =  getFlag_("annotate_file_origin");

    if (force_type == FileTypes::FEATUREXML)
    {
      FeatureMap<> out;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        FeatureMap<> map;
        FeatureXMLFile fh;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      FeatureXMLFile f;
      f.store(out_file, out);

    }
    else if (force_type == FileTypes::CONSENSUSXML)
    {
      ConsensusMap out;
      ConsensusXMLFile fh;
      fh.load(file_list[0], out);
      //skip first file
      for (Size i = 1; i < file_list.size(); ++i)
      {
        ConsensusMap map;
        ConsensusXMLFile fh;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      ConsensusXMLFile f;
      f.store(out_file, out);
    }
    else if (force_type == FileTypes::TRAML)
    {
      TargetedExperiment out;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        TargetedExperiment map;
        TraMLFile fh;
        fh.load(file_list[i], map);
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      Software software;
      software.setName("FileMerger");
      software.setVersion(VersionInfo::getVersion());
      out.addSoftware(software);

      TraMLFile f;
      f.store(out_file, out);
    }
    else
    {
      // we might want to combine different types, thus we only
      // query in_type (which applies to all files)
      // and not the suffix or content of a single file
      force_type = FileTypes::nameToType(getStringOption_("in_type"));

      //rt
      bool rt_auto_number = getFlag_("raw:rt_auto");
      bool rt_filename = getFlag_("raw:rt_filename");
      bool rt_custom = false;
      DoubleList custom_rts = getDoubleList_("raw:rt_custom");
      if (custom_rts.size() != 0)
      {
        rt_custom = true;
        if (custom_rts.size() != file_list.size())
        {
          writeLog_("Custom retention time list must have as many elements as there are input files!");
          printUsage_();
          return ILLEGAL_PARAMETERS;
        }
      }

      //ms level
      bool user_ms_level = getFlag_("raw:user_ms_level");

      MSExperiment<> out;
      out.reserve(file_list.size());
      UInt rt_auto = 0;
      UInt native_id = 0;
      std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        String filename = file_list[i];

        //load file
        MSExperiment<> in;
        fh.loadExperiment(filename, in, force_type, log_type_);
        if (in.empty() && in.getChromatograms().empty())
        {
          writeLog_(String("Warning: Empty file '") + filename + "'!");
          continue;
        }
        out.reserve(out.size() + in.size());

        //warn if custom RT and more than one scan in input file
        if (rt_custom && in.size() > 1)
        {
          writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!");
        }

        for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2)
        {
          //handle rt
          Real rt_final = it2->getRT();
          if (rt_auto_number)
          {
            rt_final = ++rt_auto;
          }
          else if (rt_custom)
          {
            rt_final = custom_rts[i];
          }
          else if (rt_filename)
          {
            if (!filename.hasSubstring("rt"))
            {
              writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'"));
            }
            for (Size i = 0; i < filename.size(); ++i)
            {
              if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i]))
              {
                String rt;
                while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i])))
                {
                  rt += filename[i++];
                }
                if (rt.size() > 0)
                {
                  // remove dot from rt3892.98.dta
                  //                          ^
                  if (rt[rt.size() - 1] == '.')
                  {
                    // remove last character
                    rt.erase(rt.end() - 1);
                  }
                }
                try
                {
                  float tmp = rt.toFloat();
                  rt_final = tmp;
                }
                catch (Exception::ConversionError)
                {
                  writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'."));
                }
              }
            }
          }

          // none of the rt methods were successful
          if (rt_final == -1)
          {
            writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'");
          }

          out.addSpectrum(*it2);
          out.getSpectra().back().setRT(rt_final);
          out.getSpectra().back().setNativeID(native_id);

          if (user_ms_level)
          {
            out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level"));
          }
          ++native_id;
        }

        // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles)
        if (in.size() == 1)
        {
          out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]);
          in.getSourceFiles().clear();   // delete source file annotated from source file (its in the spectrum anyways)
        }
        // copy experimental settings from first file
        if (i == 0)
        {
          out.ExperimentalSettings::operator=(in);
        }
        else // otherwise append
        {
          out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then
        }

        // also add the chromatograms
        for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2)
        {
          all_chromatograms.push_back(*it2);
        }

      }
      // set the chromatograms
      out.setChromatograms(all_chromatograms);

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      MzMLFile f;
      f.setLogType(log_type_);
      f.store(out_file, out);

    }

    return EXECUTION_OK;
  }

Example #19

Show file

File: DTA2DFile_test.cpp Project: chahuistle/OpenMS

  TEST_STRING_EQUAL(FileTypes::typeToName(e.getLoadedFileType()),"dta2d");

	TEST_EQUAL(e.size(), 9);
	ABORT_IF(e.size() != 9)

	TEST_STRING_EQUAL(e[0].getNativeID(),"index=0")
	TEST_STRING_EQUAL(e[1].getNativeID(),"index=1")
	TEST_STRING_EQUAL(e[2].getNativeID(),"index=2")
	TEST_STRING_EQUAL(e[3].getNativeID(),"index=3")
	TEST_STRING_EQUAL(e[4].getNativeID(),"index=4")
	TEST_STRING_EQUAL(e[5].getNativeID(),"index=5")
	TEST_STRING_EQUAL(e[6].getNativeID(),"index=6")
	TEST_STRING_EQUAL(e[7].getNativeID(),"index=7")
	TEST_STRING_EQUAL(e[8].getNativeID(),"index=8")

	MSExperiment<>::const_iterator it(e.begin());
	TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 230.02)
	TEST_REAL_SIMILAR(it->getRT(), 4711.1)
	TEST_REAL_SIMILAR((*it)[0].getIntensity(), 47218.89)
	++it;

	TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 231.51)
	TEST_REAL_SIMILAR(it->getRT(), 4711.2)
	TEST_REAL_SIMILAR((*it)[0].getIntensity(), 89935.22)
	++it;

	TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 139.42)
	TEST_REAL_SIMILAR(it->getRT(), 4711.3)
	TEST_REAL_SIMILAR((*it)[0].getIntensity(), 318.52)
	++it;