Esempio n. 1
0
 void Normalizer::filterPeakMap(PeakMap & exp)
 {
   for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
   {
     filterSpectrum(*it);
   }
 }
Esempio n. 2
0
void CompNovoIdentificationCID::getIdentifications(vector<PeptideIdentification> & pep_ids, const PeakMap & exp)
{
    Size count(1);
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it, ++count)
    {
        //cerr << count << "/" << exp.size() << endl;
        PeptideIdentification id;
        // TODO check if both CID and ETD is present;
        PeakSpectrum CID_spec(*it);
        id.setRT(it->getRT());
        id.setMZ(it->getPrecursors().begin()->getMZ());

        subspec_to_sequences_.clear();
        permute_cache_.clear();
        decomp_cache_.clear();

        getIdentification(id, CID_spec);
        //cerr << "size_of id=" << id.getHits().size() << endl;
        pep_ids.push_back(id);

        //++it;

        //
        //if (count == 10)
        //{
        //return;
        //}
    }
    return;
}
Esempio n. 3
0
  void MassTraceDetection::run(PeakMap::ConstAreaIterator& begin,
                               PeakMap::ConstAreaIterator& end,
                               std::vector<MassTrace>& found_masstraces)
  {
    PeakMap map;
    MSSpectrum<Peak1D> current_spectrum;

    if (begin == end)
    {
      return;
    }

    for (; begin != end; ++begin)
    {
      // AreaIterator points on novel spectrum?
      if (begin.getRT() != current_spectrum.getRT())
      {
        // save new spectrum in map
        if (current_spectrum.getRT() != -1)
        {
          map.addSpectrum(current_spectrum);
        }
        current_spectrum.clear(false);
        current_spectrum.setRT(begin.getRT());
      }
      current_spectrum.push_back(*begin);
    }
    map.addSpectrum(current_spectrum);

    run(map, found_masstraces);
  }
Esempio n. 4
0
 void ParentPeakMower::filterPeakMap(PeakMap & exp)
 {
   for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
   {
     filterSpectrum(*it);
   }
 }
 // lists of peptide hits in "maps" will be sorted
 bool MapAlignmentAlgorithmIdentification::getRetentionTimes_(
   PeakMap& experiment, SeqToList& rt_data)
 {
   for (PeakMap::Iterator exp_it = experiment.begin();
        exp_it != experiment.end(); ++exp_it)
   {
     getRetentionTimes_(exp_it->getPeptideIdentifications(), rt_data);
   }
   // duplicate annotations should not be possible -> no need to remove them
   return false;
 }
Esempio n. 6
0
  void MzMLFile::load(const String& filename, PeakMap& map)
  {
    map.reset();

    //set DocumentIdentifier
    map.setLoadedFileType(filename);
    map.setLoadedFilePath(filename);

    Internal::MzMLHandler handler(map, filename, getVersion(), *this);
    handler.setOptions(options_);
    safeParse_(filename, &handler);
  }
Esempio n. 7
0
 void WindowMower::filterPeakMap(PeakMap & exp)
 {
   bool sliding = (String)param_.getValue("movetype") == "slide" ? true : false;
   for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
   {
     if (sliding)
     {
       filterPeakSpectrumForTopNInSlidingWindow(*it);
     } else
     {
       filterPeakSpectrumForTopNInJumpingWindow(*it);
     }
   }
 }
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    String in_spectra = getStringOption_("in_spectra");
    String in_identifications = getStringOption_("in_identifications");
    String outfile = getStringOption_("model_output_file");
    Int precursor_charge = getIntOption_("precursor_charge");

    //-------------------------------------------------------------
    // init SvmTheoreticalSpectrumGeneratorTrainer
    //-------------------------------------------------------------
    SvmTheoreticalSpectrumGeneratorTrainer trainer;

    Param param = getParam_().copy("algorithm:", true);
    String write_files = getFlag_("write_training_files") ? "true" : "false";
    param.setValue("write_training_files", write_files);
    trainer.setParameters(param);

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    PeakMap map;
    MzMLFile().load(in_spectra, map);

    std::vector<PeptideIdentification> pep_ids;
    std::vector<ProteinIdentification> prot_ids;
    String tmp_str;
    IdXMLFile().load(in_identifications, prot_ids, pep_ids, tmp_str);

    IDMapper idmapper;
    Param par;
    par.setValue("rt_tolerance", 0.001);
    par.setValue("mz_tolerance", 0.001);
    idmapper.setParameters(par);
    idmapper.annotate(map, pep_ids, prot_ids);

    //generate vector of annotations
    std::vector<AASequence> annotations;
    PeakMap::iterator it;
    for (it = map.begin(); it != map.end(); ++it)
    {
      annotations.push_back(it->getPeptideIdentifications()[0].getHits()[0].getSequence());
    }

    trainer.trainModel(map, annotations, outfile, precursor_charge);
    return EXECUTION_OK;
  }
  ExitCodes main_(int, const char **) override
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input/output files
    String in(getStringOption_("in"));
    String out(getStringOption_("out"));

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile f;
    f.setLogType(log_type_);
    f.load(in, exp);

    //-------------------------------------------------------------
    // if meta data arrays are present, remove them and warn
    //-------------------------------------------------------------
    if (exp.clearMetaDataArrays())
    {
      writeLog_("Warning: Spectrum meta data arrays cannot be sorted. They are deleted.");
    }

    //-------------------------------------------------------------
    // filter
    //-------------------------------------------------------------
    Param filter_param = getParam_().copy("algorithm:", true);
    writeDebug_("Used filter parameters", filter_param, 3);

    BernNorm filter;
    filter.setParameters(filter_param);
    filter.filterPeakMap(exp);

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    //annotate output with data processing info
    addDataProcessing_(exp, getProcessingInfo_(DataProcessing::FILTERING));

    f.store(out, exp);

    return EXECUTION_OK;
  }
Esempio n. 10
0
CachedmzML cacheFile(std::string & tmp_filename, PeakMap& exp)
{
  NEW_TMP_FILE(tmp_filename);

  // Load experiment
  MzMLFile().load(OPENMS_GET_TEST_DATA_PATH("MzMLFile_1.mzML"), exp);
  TEST_EQUAL(exp.getNrSpectra() > 0, true)
  TEST_EQUAL(exp.getNrChromatograms() > 0, true)

  // Cache the experiment to a temporary file
  CachedmzML cache;
  cache.writeMemdump(exp, tmp_filename);
  // Create the index from the given file
  cache.createMemdumpIndex(tmp_filename);
  return cache;
}
Esempio n. 11
0
  bool IDEvaluationBase::addSearchFile(const String& file_name)
  {
    MSSpectrum<> points;
    if (!loadCurve(file_name, points)) return false;

    data_.addSpectrum(points);

    PeakMap* exp = new PeakMap();
    exp->addSpectrum(points);
    spec_1d_->canvas()->addLayer(SpectrumCanvas::ExperimentSharedPtrType(exp));
    spec_1d_->canvas()->setLayerName(spec_1d_->canvas()->getLayerCount() - 1, points.getMetaValue("search_engine"));
    // set intensity mode (after spectrum has been added!)
    setIntensityMode((int) SpectrumCanvas::IM_SNAP);

    return true;
  }
  // version for label-free linkers
  void XQuestResultXMLFile::writeXQuestXMLSpec(String out_file, String base_name, const std::vector< std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > >& all_top_csms, const PeakMap& spectra)
  {
    // String spec_xml_filename = base_name + "_matched.spec.xml";
    // XML Header
    std::ofstream spec_xml_file;
    std::cout << "Writing spec.xml to " << out_file << std::endl;
    spec_xml_file.open(out_file.c_str(), std::ios::trunc); // ios::app = append to file, ios::trunc = overwrites file
    // TODO write actual data
    spec_xml_file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?><xquest_spectra compare_peaks_version=\"3.4\" date=\"Tue Nov 24 12:41:18 2015\" author=\"Thomas Walzthoeni,Oliver Rinner\" homepage=\"http://proteomics.ethz.ch\" resultdir=\"aleitner_M1012_004_matched\" deffile=\"xquest.def\" >" << std::endl;

    // collect indices of spectra, that need to be written out
    std::vector <Size> spectrum_indices;

    for (Size i = 0; i < all_top_csms.size(); ++i)
    {
      if (!all_top_csms[i].empty())
      {
        if (all_top_csms[i][0].scan_index_light < spectra.size())
        {
          spectrum_indices.push_back(all_top_csms[i][0].scan_index_light);
        }
      }
    }

    // loop over list of indices and write out spectra
    for (Size i = 0; i < spectrum_indices.size(); ++i)
    {
      String spectrum_light_name = base_name + ".light." + spectrum_indices[i];
      String spectrum_heavy_name = base_name + ".heavy." + spectrum_indices[i];

      String spectrum_name = spectrum_light_name + String("_") + spectrum_heavy_name;

      // 4 Spectra resulting from a light/heavy spectra pair.  Write for each spectrum, that is written to xquest.xml (should be all considered pairs, or better only those with at least one sensible Hit, meaning a score was computed)
      spec_xml_file << "<spectrum filename=\"" << spectrum_light_name << ".dta" << "\" type=\"light\">" << std::endl;
      spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], String(""));
      spec_xml_file << "</spectrum>" << std::endl;

      spec_xml_file << "<spectrum filename=\"" << spectrum_heavy_name << ".dta" << "\" type=\"heavy\">" << std::endl;
      spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], String(""));
      spec_xml_file << "</spectrum>" << std::endl;

      String spectrum_common_name = spectrum_name + String("_common.txt");
      spec_xml_file << "<spectrum filename=\"" << spectrum_common_name << "\" type=\"common\">" << std::endl;
      spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta");
      spec_xml_file << "</spectrum>" << std::endl;

      String spectrum_xlink_name = spectrum_name + String("_xlinker.txt");
      spec_xml_file << "<spectrum filename=\"" << spectrum_xlink_name << "\" type=\"xlinker\">" << std::endl;
      spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta");
      spec_xml_file << "</spectrum>" << std::endl;
    }

    spec_xml_file << "</xquest_spectra>" << std::endl;
    spec_xml_file.close();

    return;
  }
Esempio n. 13
0
 vector<vector<Size> > PScore::calculateRankMap(const PeakMap& peak_map, double mz_window)
 {
   vector<std::vector<Size> > rank_map; // note: ranks are zero based
   rank_map.reserve(peak_map.size());
   for (Size i = 0; i != peak_map.size(); ++i)
   {
     const PeakSpectrum& spec = peak_map[i];
     vector<double> mz;
     vector<double> intensities;
     for (Size j = 0; j != spec.size(); ++j)
     {
       mz.push_back(spec[j].getMZ());
       intensities.push_back(spec[j].getIntensity());
     }
     rank_map.push_back(calculateIntensityRankInMZWindow(mz, intensities, mz_window));
   }
   return rank_map;
 }
Int main(int argc, const char ** argv)
{
  if (argc < 2) return 1;
  // the path to the data should be given on the command line
  String tutorial_data_path(argv[1]);
  
  QApplication app(argc, const_cast<char **>(argv));

  PeakMap exp;
  exp.resize(1);
  DTAFile().load(tutorial_data_path + "/data/Tutorial_Spectrum1D.dta", exp[0]);
  LayerData::ExperimentSharedPtrType exp_sptr(new PeakMap(exp));
  Spectrum1DWidget * widget = new Spectrum1DWidget(Param(), 0);
  widget->canvas()->addLayer(exp_sptr);
  widget->show();

  return app.exec();
} //end of main
 void getPrecursors_(const PeakMap & exp, vector<Precursor> & precursors, vector<double> & precursors_rt)
 {
   for (Size i = 0; i != exp.size(); ++i)
   {
     vector<Precursor> pcs = exp[i].getPrecursors();
     if (pcs.empty())
     {
       continue;
     }
     vector<double> pcs_rt(pcs.size(), exp[i].getRT());
     copy(pcs.begin(), pcs.end(), back_inserter(precursors));
     copy(pcs_rt.begin(), pcs_rt.end(), back_inserter(precursors_rt));
   }
 }
void getSwathFile(PeakMap& exp, int nr_swathes=32, bool ms1=true)
{
  if (ms1)
  {
    MSSpectrum s;
    s.setMSLevel(1);
    Peak1D p; p.setMZ(100); p.setIntensity(200);
    s.push_back(p);
    exp.addSpectrum(s);
  }
  for (int i = 0; i< nr_swathes; i++)
  {
    MSSpectrum s;
    s.setMSLevel(2);
    std::vector<Precursor> prec(1);
    prec[0].setIsolationWindowLowerOffset(12.5);
    prec[0].setIsolationWindowUpperOffset(12.5);
    prec[0].setMZ(400 + i*25 + 12.5);
    s.setPrecursors(prec);
    Peak1D p; p.setMZ(101 + i); p.setIntensity(201 + i);
    s.push_back(p);
    exp.addSpectrum(s);
  }
}
  ExitCodes main_(int, const char**)
  {
    // instance specific location of settings in INI file (e.g. 'TOPP_Skeleton:1:')
    String ini_location;
    // path to the log file
    String logfile(getStringOption_("log"));
    String xtandem_executable(getStringOption_("xtandem_executable"));
    String inputfile_name;
    String outputfile_name;

    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------

    inputfile_name = getStringOption_("in");
    writeDebug_(String("Input file: ") + inputfile_name, 1);
    if (inputfile_name == "")
    {
      writeLog_("No input file specified. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    outputfile_name = getStringOption_("out");
    writeDebug_(String("Output file: ") + outputfile_name, 1);
    if (outputfile_name == "")
    {
      writeLog_("No output file specified. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    // write input xml file
    String temp_directory = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files
    {
      QDir d;
      d.mkpath(temp_directory.toQString());
    }

    String input_filename(temp_directory + "_tandem_input_file.xml");
    String tandem_input_filename(temp_directory + "_tandem_input_file.mzData");
    String tandem_output_filename(temp_directory + "_tandem_output_file.xml");
    String tandem_taxonomy_filename(temp_directory + "_tandem_taxonomy_file.xml");

    //-------------------------------------------------------------
    // Validate user parameters
    //-------------------------------------------------------------
    if (getIntOption_("min_precursor_charge") > getIntOption_("max_precursor_charge"))
    {
      LOG_ERROR << "Given charge range is invalid: max_precursor_charge needs to be >= min_precursor_charge." << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    String db_name(getStringOption_("database"));
    if (!File::readable(db_name))
    {
      String full_db_name;
      try
      {
        full_db_name = File::findDatabase(db_name);
      }
      catch (...)
      {
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
      db_name = full_db_name;
    }


    PeakMap exp;
    MzMLFile mzml_file;
    mzml_file.getOptions().addMSLevel(2); // only load msLevel 2
    mzml_file.setLogType(log_type_);
    mzml_file.load(inputfile_name, exp);

    if (exp.getSpectra().empty())
    {
      throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file.");
    }

    // determine type of spectral data (profile or centroided)
    SpectrumSettings::SpectrumType spectrum_type = exp[0].getType();

    if (spectrum_type == SpectrumSettings::RAWDATA)
    {
      if (!getFlag_("force"))
      {
        throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag.");
      }
    }

    // we need to replace the native id with a simple numbering schema, to be able to
    // map the IDs back to the spectra (RT, and MZ information)
    Size native_id(0);
    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      it->setNativeID(++native_id);
    }

    // We store the file in mzData file format, because MGF files somehow produce in most
    // of the cases IDs with charge 2+. We do not use the input file directly
    // because XTandem sometimes stumbles over misleading substrings in the filename,
    // e.g. mzXML ...
    MzDataFile mzdata_outfile;
    mzdata_outfile.store(tandem_input_filename, exp);

    XTandemInfile infile;
    infile.setInputFilename(tandem_input_filename);
    infile.setOutputFilename(tandem_output_filename);

    ofstream tax_out(tandem_taxonomy_filename.c_str());
    tax_out << "<?xml version=\"1.0\"?>" << "\n";
    tax_out << "\t<bioml label=\"x! taxon-to-file matching list\">" << "\n";
    tax_out << "\t\t<taxon label=\"OpenMS_dummy_taxonomy\">" << "\n";
    tax_out << "\t\t\t<file format=\"peptide\" URL=\"" << db_name << "\" />" << "\n";
    tax_out << "\t</taxon>" << "\n";
    tax_out << "</bioml>" << "\n";
    tax_out.close();

    infile.setTaxonomyFilename(tandem_taxonomy_filename);

    if (getStringOption_("precursor_error_units") == "Da")
    {
      infile.setPrecursorMassErrorUnit(XTandemInfile::DALTONS);
    }
    else
    {
      infile.setPrecursorMassErrorUnit(XTandemInfile::PPM);
    }

    if (getStringOption_("fragment_error_units") == "Da")
    {
      infile.setFragmentMassErrorUnit(XTandemInfile::DALTONS);
    }
    else
    {
      infile.setFragmentMassErrorUnit(XTandemInfile::PPM);
    }

    if (getStringOption_("default_input_file") != "")
    {
      infile.load(getStringOption_("default_input_file"));
      infile.setDefaultParametersFilename(getStringOption_("default_input_file"));
    }
    else
    {
      String default_file = File::find("CHEMISTRY/XTandem_default_input.xml");
      infile.load(default_file);
      infile.setDefaultParametersFilename(default_file);
    }

    infile.setPrecursorMassTolerancePlus(getDoubleOption_("precursor_mass_tolerance"));
    infile.setPrecursorMassToleranceMinus(getDoubleOption_("precursor_mass_tolerance"));
    infile.setFragmentMassTolerance(getDoubleOption_("fragment_mass_tolerance"));
    infile.setMaxPrecursorCharge(getIntOption_("max_precursor_charge"));
    infile.setNumberOfThreads(getIntOption_("threads"));
    infile.setModifications(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications")));
    infile.setTaxon("OpenMS_dummy_taxonomy");
    infile.setOutputResults(getStringOption_("output_results"));
    infile.setMaxValidEValue(getDoubleOption_("max_valid_expect"));
    infile.setCleavageSite(getStringOption_("cleavage_site"));
    infile.setNumberOfMissedCleavages(getIntOption_("missed_cleavages"));
    infile.setRefine(getFlag_("refinement"));
    infile.setSemiCleavage(getFlag_("semi_cleavage"));
    bool allow_isotope_error = getStringOption_("allow_isotope_error") == "yes" ? true : false;
    infile.setAllowIsotopeError(allow_isotope_error);

    infile.write(input_filename);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    int status = QProcess::execute(xtandem_executable.toQString(), QStringList(input_filename.toQString())); // does automatic escaping etc...
    if (status != 0)
    {
      writeLog_("XTandem problem. Aborting! Calling command was: '" + xtandem_executable + " \"" + input_filename + "\"'.\nDoes the !XTandem executable exist?");
      // clean temporary files
      if (this->debug_level_ < 2)
      {
        File::removeDirRecursively(temp_directory);
        LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl;
      }
      else
      {
        LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl;
      }
      return EXTERNAL_PROGRAM_ERROR;
    }

    vector<ProteinIdentification> protein_ids;
    ProteinIdentification protein_id;
    vector<PeptideIdentification> peptide_ids;

    // read the output of X!Tandem and write it to idXML
    XTandemXMLFile tandem_output;
    tandem_output.setModificationDefinitionsSet(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications")));
    // find the file, because XTandem extends the filename with a timestamp we do not know (exactly)
    StringList files;
    File::fileList(temp_directory, "_tandem_output_file*.xml", files);
    if (files.size() != 1)
    {
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, tandem_output_filename);
    }
    tandem_output.load(temp_directory + files[0], protein_id, peptide_ids);

    // now put the RTs into the peptide_ids from the spectrum ids
    for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it != peptide_ids.end(); ++it)
    {
      UInt id = (Int)it->getMetaValue("spectrum_id");
      --id; // native IDs were written 1-based
      if (id < exp.size())
      {
        it->setRT(exp[id].getRT());
        double pre_mz(0.0);
        if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ();
        it->setMZ(pre_mz);
        //it->removeMetaValue("spectrum_id");
      }
      else
      {
        LOG_ERROR << "XTandemAdapter: Error: id '" << id << "' not found in peak map!" << endl;
      }
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    // handle the search parameters
    ProteinIdentification::SearchParameters search_parameters;
    search_parameters.db = getStringOption_("database");
    search_parameters.charges = "+" + String(getIntOption_("min_precursor_charge")) + "-+" + String(getIntOption_("max_precursor_charge"));

    ProteinIdentification::PeakMassType mass_type = ProteinIdentification::MONOISOTOPIC;
    search_parameters.mass_type = mass_type;
    search_parameters.fixed_modifications = getStringList_("fixed_modifications");
    search_parameters.variable_modifications = getStringList_("variable_modifications");
    search_parameters.missed_cleavages = getIntOption_("missed_cleavages");
    search_parameters.peak_mass_tolerance = getDoubleOption_("fragment_mass_tolerance");
    search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance");

    protein_id.setSearchParameters(search_parameters);
    protein_id.setSearchEngineVersion("");
    protein_id.setSearchEngine("XTandem");

    protein_ids.push_back(protein_id);

    IdXMLFile().store(outputfile_name, protein_ids, peptide_ids);

    /// Deletion of temporary files
    if (this->debug_level_ < 2)
    {
      File::removeDirRecursively(temp_directory);
      LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl;
    }
    else
    {
      LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl;
    }

    // some stats
    LOG_INFO << "Statistics:\n"
             << "  identified MS2 spectra: " << peptide_ids.size() << " / " << exp.size() << " = " << int(peptide_ids.size() * 100.0 / exp.size()) << "% (with e-value < " << String(getDoubleOption_("max_valid_expect")) << ")" << std::endl;

    return EXECUTION_OK;
  }
  ExitCodes main_(int, const char**)
  {
    // parsing parameters
    String in(getStringOption_("in"));
    String feature_in(getStringOption_("feature_in"));
    String out(getStringOption_("out"));
    double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance"));

    // reading input
    FileHandler fh;
    FileTypes::Type in_type = fh.getType(in);

    PeakMap exp;
    fh.loadExperiment(in, exp, in_type, log_type_, false, false);
    exp.sortSpectra();

    FeatureMap feature_map;
    if (feature_in != "")
    {
      FeatureXMLFile().load(feature_in, feature_map);
    }

    // calculations
    FeatureFinderAlgorithmIsotopeWavelet iso_ff;
    Param ff_param(iso_ff.getParameters());
    ff_param.setValue("max_charge", getIntOption_("max_charge"));
    ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold"));
    iso_ff.setParameters(ff_param);

    FeatureFinder ff;
    ff.setLogType(ProgressLogger::NONE);

    PeakMap exp2 = exp;
    exp2.clear(false);
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->size() != 0)
      {
        exp2.addSpectrum(*it);
      }
    }

    exp = exp2;
    exp.updateRanges();

    // TODO check MS2 and MS1 counts
    ProgressLogger progresslogger;
    progresslogger.setLogType(log_type_);
    progresslogger.startProgress(0, exp.size(), "Correcting precursor masses");
    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      progresslogger.setProgress(exp.end() - it);
      if (it->getMSLevel() != 2)
      {
        continue;
      }
      // find first MS1 scan of the MS/MS scan
      PeakMap::Iterator ms1_it = it;
      while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1)
      {
        --ms1_it;
      }
      if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1)
      {
        writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT()));
        continue;
      }
      if (ms1_it->size() == 0)
      {
        writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1);
        continue;
      }

      PeakMap::Iterator ms2_it = ms1_it;
      ++ms2_it;

      while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2)
      {
        // first: error checks
        if (ms2_it->getPrecursors().empty())
        {
          writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1);
          ++ms2_it;
          continue;
        }
        else if (ms2_it->getPrecursors().size() > 1)
        {
          writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one.");
        }

        Precursor prec = *ms2_it->getPrecursors().begin();
        double prec_pos = prec.getMZ();

        PeakMap new_exp;
        // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...)
        PeakSpectrum zoom_spec;
        for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit)
        {
          if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3)
          {
            zoom_spec.push_back(*pit);
          }
        }
        new_exp.addSpectrum(zoom_spec);
        new_exp.updateRanges();
        FeatureMap features, seeds;
        ff.run("isotope_wavelet", new_exp, features, ff_param, seeds);
        if (features.empty())
        {
          writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1);
          ++ms2_it;
          continue;
        }

        double max_int(numeric_limits<double>::min());
        double min_dist(numeric_limits<double>::max());
        Size max_int_feat_idx(0);

        for (Size i = 0; i != features.size(); ++i)
        {
          if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance &&
              features[i].getIntensity() > max_int)
          {
            max_int_feat_idx = i;
            max_int = features[i].getIntensity();
            min_dist = fabs(features[i].getMZ() - prec_pos);
          }
        }


        writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5);
        if (min_dist < precursor_mass_tolerance)
        {
          prec.setMZ(features[max_int_feat_idx].getMZ());
          prec.setCharge(features[max_int_feat_idx].getCharge());
          vector<Precursor> precs;
          precs.push_back(prec);
          ms2_it->setPrecursors(precs);
          writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1);
        }

        ++ms2_it;
      }
      it = --ms2_it;
    }
    progresslogger.endProgress();

    // writing output
    fh.storeExperiment(out, exp, log_type_);

    return EXECUTION_OK;
  }
Esempio n. 19
0
START_SECTION((static FileTypes::Type getType(const String &filename)))
FileHandler tmp;
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("header_file.h")), FileTypes::UNKNOWN)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("class_test_infile.txt")), FileTypes::TXT)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("IdXMLFile_whole.idXML")), FileTypes::IDXML)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("ConsensusXMLFile.consensusXML")), FileTypes::CONSENSUSXML)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("TransformationXMLFile_1.trafoXML")), FileTypes::TRANSFORMATIONXML)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("FileHandler_toppas.toppas")), FileTypes::TOPPAS)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("pepnovo.txt")), FileTypes::TXT)

TEST_EXCEPTION(Exception::FileNotFound, tmp.getType("/bli/bla/bluff"))
END_SECTION

START_SECTION((template < class PeakType > bool loadExperiment(const String &filename, MSExperiment< PeakType > &exp, FileTypes::Type force_type=FileTypes::UNKNOWN, ProgressLogger::LogType log=ProgressLogger::NONE, const bool compute_hash=true)))
FileHandler tmp;
PeakMap exp;
TEST_EQUAL(tmp.loadExperiment("test.bla", exp), false)
TEST_EQUAL(tmp.loadExperiment(OPENMS_GET_TEST_DATA_PATH("DTAFile_test.dta"), exp), true)

TEST_EQUAL(tmp.loadExperiment(OPENMS_GET_TEST_DATA_PATH("MzDataFile_1.mzData"), exp), true)
TEST_REAL_SIMILAR(exp[1][0].getPosition()[0], 110)
TEST_REAL_SIMILAR(exp[1][1].getPosition()[0], 120)
TEST_REAL_SIMILAR(exp[1][2].getPosition()[0], 130)

// starts with 110, so this one should skip the first
tmp.getOptions().setMZRange(DRange<1>(115, 1000));
TEST_EQUAL(tmp.loadExperiment(OPENMS_GET_TEST_DATA_PATH("MzDataFile_1.mzData"), exp), true)
TEST_REAL_SIMILAR(exp[1][0].getPosition()[0], 120)
TEST_REAL_SIMILAR(exp[1][1].getPosition()[0], 130)

tmp.getOptions() = PeakFileOptions();
    // Wrong assignment of the mono-isotopic mass for precursors are assumed:
    // - if precursor_mz matches the mz of a non-monoisotopic feature mass trace
    // - and in the case that believe_charge is true: if feature_charge matches the precursor_charge
    // In the case of wrong mono-isotopic assignment several options for correction are available:
    // keep_original will create a copy of the precursor and tandem spectrum for the new mono-isotopic mass trace and retain the original one
    // all_matching_features does this not for only the closest feature but all features in a question
    set<Size> correctToNearestFeature(const FeatureMap& features, PeakMap & exp, double rt_tolerance_s = 0.0, double mz_tolerance = 0.0, bool ppm = true, bool believe_charge = false, bool keep_original = false, bool all_matching_features = false, int max_trace = 2)
    {
      set<Size> corrected_precursors;
      // for each precursor/MS2 find all features that are in the given tolerance window (bounding box + rt tolerances)
      // if believe_charge is set, only add features that match the precursor charge
      map<Size, set<Size> > scan_idx_to_feature_idx;

      for (Size scan = 0; scan != exp.size(); ++scan)
      {
        // skip non-tandem mass spectra
        if (exp[scan].getMSLevel() != 2 || exp[scan].getPrecursors().empty()) continue;

        // extract precusor / MS2 information
        const double pc_mz = exp[scan].getPrecursors()[0].getMZ();
        const double rt = exp[scan].getRT();
        const int pc_charge = exp[scan].getPrecursors()[0].getCharge();

        for (Size f = 0; f != features.size(); ++f)
        {
          // feature  is incompatible if believe_charge is set and charges don't match
          if (believe_charge && features[f].getCharge() != pc_charge) continue;

          // check if precursor/MS2 position overlap with feature
          if (overlaps_(features[f], rt, pc_mz, rt_tolerance_s))
          {
            scan_idx_to_feature_idx[scan].insert(f);
          }
        }
      }

      // filter sets to retain compatible features:
      // if precursor_mz = feature_mz + n * feature_charge (+/- mz_tolerance) a feature is compatible, others are removed from the set
      for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
      {
        const Size scan = it->first;
        const double pc_mz = exp[scan].getPrecursors()[0].getMZ();
        const double mz_tolerance_da = ppm ? pc_mz * mz_tolerance * 1e-6  : mz_tolerance;

        // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
        for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); )
        {
          if (!compatible_(features[*sit], pc_mz, mz_tolerance_da, max_trace))
          {
            it->second.erase(sit++);
          }
          else
          {
            ++sit;
          }
        }
      }

      // remove entries with no compatible features (empty sets).
      // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
      for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); )
      {
        if (it->second.empty())
        {
          scan_idx_to_feature_idx.erase(it++);
        }
        else
        {
          ++it;
        }
      }

      if (debug_level_ > 0)
      {
        LOG_INFO << "Number of precursors with compatible features: " << scan_idx_to_feature_idx.size() << endl;
      }

      if (!all_matching_features)
      {
        // keep only nearest features in set
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          const double pc_rt = exp[scan].getRT();

          double min_distance = 1e16;
          set<Size>::iterator best_feature = it->second.begin();

          // determine nearest/best feature
          for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ++sit)
          {
            const double current_distance = fabs(pc_rt - features[*sit].getRT());
            if (current_distance < min_distance)
            {
              min_distance = current_distance;
              best_feature = sit;
            }
          }

          // delete all except the nearest/best feature
          // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
          for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); )
          {
            if (sit != best_feature)
            {
              it->second.erase(sit++);
            }
            else
            {
              ++sit;
            }
          }
        }
      }

      // depending on all_matching_features option, only the nearest or all features are contained in the sets
      // depending on options: move/copy corrected precursor and tandem spectrum
      if (keep_original)
      {
        // duplicate spectra for each feature in set and adapt precursor_mz and precursor_charge to feature_mz and feature_charge
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          MSSpectrum<> spectrum = exp[scan];
          corrected_precursors.insert(scan);
          for (set<Size>::iterator f_it = it->second.begin(); f_it != it->second.end(); ++f_it)
          {
            spectrum.getPrecursors()[0].setMZ(features[*f_it].getMZ());
            spectrum.getPrecursors()[0].setCharge(features[*f_it].getCharge());
            exp.addSpectrum(spectrum);
          }
        }
      }
      else
      {
        // set precursor_mz and _charge to the feature_mz and _charge
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          exp[scan].getPrecursors()[0].setMZ(features[*it->second.begin()].getMZ());
          exp[scan].getPrecursors()[0].setCharge(features[*it->second.begin()].getCharge());
          corrected_precursors.insert(scan);
        }
      }
      return corrected_precursors;
    }
    set<Size> correctToNearestMS1Peak(PeakMap & exp, double mz_tolerance, bool ppm, vector<double> & deltaMZs, vector<double> & mzs, vector<double> & rts)
    {
      set<Size> corrected_precursors;
      // load experiment and extract precursors
      vector<Precursor> precursors;  // precursor
      vector<double> precursors_rt;  // RT of precursor MS2 spectrum
      vector<Size> precursor_scan_index;
      getPrecursors_(exp, precursors, precursors_rt, precursor_scan_index);

      for (Size i = 0; i != precursors_rt.size(); ++i)
      {
        // get precursor rt
        double rt = precursors_rt[i];

        // get precursor MZ
        double mz = precursors[i].getMZ();

        //cout << rt << " " << mz << endl;

        // get precursor spectrum
        MSExperiment<Peak1D>::ConstIterator rt_it = exp.RTBegin(rt - 1e-8);

        // store index of MS2 spectrum
        UInt precursor_spectrum_idx = rt_it - exp.begin();

        // get parent (MS1) of precursor spectrum
        rt_it = exp.getPrecursorSpectrum(rt_it);

        if (rt_it->getMSLevel() != 1)
        {
          LOG_WARN << "Error: no MS1 spectrum for this precursor" << endl;
        }

        //cout << rt_it->getRT() << " " << rt_it->size() << endl;

        // find peak (index) closest to expected position
        Size nearest_peak_idx = rt_it->findNearest(mz);

        // get actual position of closest peak
        double nearest_peak_mz = (*rt_it)[nearest_peak_idx].getMZ();

        // calculate error between expected and actual position
        double nearestPeakError = ppm ? abs(nearest_peak_mz - mz)/mz * 1e6 : abs(nearest_peak_mz - mz);

        // check if error is small enough
        if (nearestPeakError < mz_tolerance)
        {
          // sanity check: do we really have the same precursor in the original and the picked spectrum
          if (fabs(exp[precursor_spectrum_idx].getPrecursors()[0].getMZ() - mz) > 0.0001)
          {
            LOG_WARN << "Error: index is referencing different precursors in original and picked spectrum." << endl;
          }

          // cout << mz << " -> " << nearest_peak_mz << endl;
          double deltaMZ = nearest_peak_mz - mz;
          deltaMZs.push_back(deltaMZ);
          mzs.push_back(mz);
          rts.push_back(rt);
          // correct entries
          Precursor corrected_prec = precursors[i];
          corrected_prec.setMZ(nearest_peak_mz);
          exp[precursor_spectrum_idx].getPrecursors()[0] = corrected_prec;
          corrected_precursors.insert(precursor_spectrum_idx);
        }
      }
      return corrected_precursors;
    }
Esempio n. 22
0
  void filterByFoldChange(const PeakMap& exp1, const PeakMap& exp2,
                          const vector<double>& pc_ms2_rts, const vector<double>& pc_mzs,
                          const double rttol, const double mztol, double fold_change,
                          vector<double>& control_XIC_larger,
                          vector<double>& treatment_XIC_larger,
                          vector<double>& indifferent_XICs)
  {
    assert(pc_mzs.size() == pc_ms2_rts.size());

    // search for each EIC and add up
    for (Size i = 0; i < pc_mzs.size(); ++i)
    {
      //cerr << "start" << endl;
      double pc_ms2_rt = pc_ms2_rts[i];
      double pc_mz = pc_mzs[i];

      //std::cerr << "Rt" << cm[i].getRT() << "  mz: " << cm[i].getMZ() << " R " <<  cm[i].getMetaValue("rank") << "\n";

      double mz_da = mztol * pc_mzs[i] / 1e6; // mz tolerance in Dalton
      double rt_start = pc_ms2_rts[i] - rttol / 2.0;

      // get area iterator (is MS1 only!) for rt and mz window
      PeakMap::ConstAreaIterator it1 = exp1.areaBeginConst(pc_ms2_rt - rttol / 2, pc_ms2_rt + rttol / 2, pc_mz - mz_da, pc_mz  + mz_da);
      PeakMap::ConstAreaIterator it2 = exp2.areaBeginConst(pc_ms2_rt - rttol / 2, pc_ms2_rt + rttol / 2, pc_mz - mz_da, pc_mz  + mz_da);

      // determine maximum number of MS1 scans in retention time window
      set<double> rts1;
      set<double> rts2;
      for (; it1 != exp1.areaEndConst(); ++it1)
      {
        rts1.insert(it1.getRT());
      }

      for (; it2 != exp2.areaEndConst(); ++it2)
      {
        rts2.insert(it2.getRT());
      }

      Size length = std::max(rts1.size(), rts2.size()) / 2.0;

      //cout << length << endl;
      if (length == 0)
      {
        cerr << "WARNING: no MS1 scans in retention time window found in both maps (mz: " << pc_mzs[i] << " / rt: " << pc_ms2_rts[i] << ")" << endl;
        continue;
      }

      vector<double> XIC1(length, 0.0);
      vector<double> XIC2(length, 0.0);

      it1 = exp1.areaBeginConst(pc_ms2_rt - rttol / 2, pc_ms2_rt + rttol / 2, pc_mz - mz_da, pc_mz + mz_da);
      it2 = exp2.areaBeginConst(pc_ms2_rt - rttol / 2, pc_ms2_rt + rttol / 2, pc_mz - mz_da, pc_mz + mz_da);

      for (; it1 != exp1.areaEndConst(); ++it1)
      {
        double relative_rt = (it1.getRT() - rt_start) / rttol;
        Size bin = relative_rt * (length - 1);
        XIC1[bin] += it1->getIntensity();
        if (bin >= length)
        {
          bin = length - 1;
        }

      }

      for (; it2 != exp2.areaEndConst(); ++it2)
      {
        double relative_rt = (it2.getRT() - rt_start) / rttol;
        Size bin = relative_rt * (length - 1);
        if (bin >= length)
        {
          bin = length - 1;
        }
        XIC2[bin] += it2->getIntensity();
      }

      double total_itensity1 = std::accumulate(XIC1.begin(), XIC1.end(), 0.0);
      double total_itensity2 = std::accumulate(XIC2.begin(), XIC2.end(), 0.0);

      double ratio = total_itensity2 / (total_itensity1 + 1);

      //cout << pc_ms2_rt << "/" << pc_mz << " has ratio: " << ratio << " determined on " << length << " bins" << endl;

      if (ratio < 1.0 / fold_change)
      {
        control_XIC_larger.push_back(pc_ms2_rt);
      }
      else if (ratio > fold_change)
      {
        treatment_XIC_larger.push_back(pc_ms2_rt);
      }
      else
      {
        indifferent_XICs.push_back(pc_ms2_rt);
        continue;
      }
      /*
      for (Size k = 0; k != length; ++k)
      {
        cout << k << ": " << rt_start + rttol / length * k  << ": " << XIC1[k] << " " << XIC2[k] << endl;
      }
      */
    }

    cout << "control larger: " << control_XIC_larger.size() << " treatment larger: " << treatment_XIC_larger.size() << " indifferent: " << indifferent_XICs.size() << endl;

    return;
  }
Esempio n. 23
0
  ExitCodes main_(int, const char **) override
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input/output files
    String in(getStringOption_("in"));
    String out(getStringOption_("out"));

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile f;
    f.setLogType(log_type_);

    PeakFileOptions options;
    options.clearMSLevels();
    options.addMSLevel(2);
    f.getOptions() = options;
    f.load(in, exp);

    writeDebug_("Data set contains " + String(exp.size()) + " spectra", 1);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    vector<PeptideIdentification> pep_ids;
    CompNovoIdentificationCID comp_novo_id;

    // set the options
    Param algorithm_param = getParam_().copy("algorithm:", true);
    comp_novo_id.setParameters(algorithm_param);
    comp_novo_id.getIdentifications(pep_ids, exp);
    algorithm_param = comp_novo_id.getParameters();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    DateTime now = DateTime::now();
    String date_string = now.get();
    String identifier("CompNovoCID_" + date_string);

    for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it)
    {
      it->assignRanks();
      it->setIdentifier(identifier);
    }

    vector<ProteinIdentification> prot_ids;
    ProteinIdentification prot_id;
    prot_id.setIdentifier(identifier);
    prot_id.setDateTime(now);
    StringList ms_runs;
    exp.getPrimaryMSRunPath(ms_runs);
    prot_id.setPrimaryMSRunPath(ms_runs);

    ProteinIdentification::SearchParameters search_parameters;
    search_parameters.charges = "+2-+3";
    if (algorithm_param.getValue("tryptic_only").toBool())
    {
      search_parameters.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme("Trypsin"));
    }
    else
    {
      search_parameters.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme("no cleavage"));
    }
    search_parameters.mass_type = ProteinIdentification::MONOISOTOPIC;
    search_parameters.fixed_modifications = algorithm_param.getValue("fixed_modifications");
    search_parameters.variable_modifications = algorithm_param.getValue("variable_modifications");

    search_parameters.missed_cleavages = (UInt)algorithm_param.getValue("missed_cleavages");
    search_parameters.fragment_mass_tolerance = (double)algorithm_param.getValue("fragment_mass_tolerance");
    search_parameters.precursor_mass_tolerance = (double)algorithm_param.getValue("precursor_mass_tolerance");
    search_parameters.fragment_mass_tolerance_ppm = false;
    search_parameters.precursor_mass_tolerance_ppm = false;
    prot_id.setSearchParameters(search_parameters);
    prot_id.setSearchEngineVersion("0.9beta");
    prot_id.setSearchEngine("CompNovo");
    prot_ids.push_back(prot_id);

    IdXMLFile().store(out, prot_ids, pep_ids);

    return EXECUTION_OK;
  }
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in(getStringOption_("in"));
    String out(getStringOption_("out"));
    String pair_in(getStringOption_("pair_in"));
    String feature_out(getStringOption_("feature_out"));
    double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance"));
    double RT_tolerance(getDoubleOption_("RT_tolerance"));
    double expansion_range(getDoubleOption_("expansion_range"));
    Size max_isotope(getIntOption_("max_isotope"));
    Int debug(getIntOption_("debug"));

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile().load(in, exp);
    exp.sortSpectra();
    exp.updateRanges();

    // read pair file
    ifstream is(pair_in.c_str());
    String line;
    vector<SILAC_pair> pairs;
    while (getline(is, line))
    {
      line.trim();
      if (line.empty() || line[0] == '#')
      {
        continue;
      }
      vector<String> split;
      line.split(' ', split);
      if (split.size() != 4)
      {
        cerr << "missformated line ('" << line << "') should be (space separated) 'm/z-light m/z-heavy charge rt'" << endl;
      }
      SILAC_pair p;
      p.mz_light = split[0].toDouble();
      p.mz_heavy = split[1].toDouble();
      p.charge = split[2].toInt();
      p.rt = split[3].toDouble();
      pairs.push_back(p);
    }
    is.close();

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------


    ConsensusMap results_map;
    results_map.getFileDescriptions()[0].label = "light";
    results_map.getFileDescriptions()[0].filename = in;
    results_map.getFileDescriptions()[1].label = "heavy";
    results_map.getFileDescriptions()[1].filename = in;

    FeatureFinderAlgorithmIsotopeWavelet iso_ff;
    Param ff_param(iso_ff.getParameters());
    ff_param.setValue("max_charge", 3);
    ff_param.setValue("intensity_threshold", -1.0);
    iso_ff.setParameters(ff_param);

    FeatureFinder ff;
    ff.setLogType(ProgressLogger::NONE);

    vector<SILACQuantitation> quantlets;
    FeatureMap all_features;
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->size() == 0 || it->getMSLevel() != 1 || !it->getInstrumentSettings().getZoomScan())
      {
        continue;
      }

      PeakSpectrum new_spec = *it;

      // get spacing from data
      double min_spacing(numeric_limits<double>::max());
      double last_mz(0);
      for (PeakSpectrum::ConstIterator pit = new_spec.begin(); pit != new_spec.end(); ++pit)
      {
        if (pit->getMZ() - last_mz < min_spacing)
        {
          min_spacing = pit->getMZ() - last_mz;
        }
        last_mz = pit->getMZ();
      }
      writeDebug_("Min-spacing=" + String(min_spacing), 1);

      // split the spectrum into two subspectra, by using different hypothesis of
      // the SILAC pairs
      Size idx = 0;
      for (vector<SILAC_pair>::const_iterator pit = pairs.begin(); pit != pairs.end(); ++pit, ++idx)
      {
        // in RT window?
        if (fabs(it->getRT() - pit->rt) >= RT_tolerance)
        {
          continue;
        }

        // now excise the two ranges for the pair, complete isotope distributions of both, light and heavy
        PeakSpectrum light_spec, heavy_spec;
        light_spec.setRT(it->getRT());
        heavy_spec.setRT(it->getRT());
        for (PeakSpectrum::ConstIterator sit = it->begin(); sit != it->end(); ++sit)
        {
          double mz(sit->getMZ());
          if (mz - (pit->mz_light - precursor_mass_tolerance) > 0 &&
              (pit->mz_light + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz  > 0)
          {
            light_spec.push_back(*sit);
          }

          if (mz - (pit->mz_heavy - precursor_mass_tolerance) > 0 &&
              (pit->mz_heavy + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz  > 0)
          {
            heavy_spec.push_back(*sit);
          }
        }

        // expand light spectrum
        Peak1D p;
        p.setIntensity(0);

        if (light_spec.size() > 0)
        {
          double lower_border = light_spec.begin()->getMZ() - expansion_range;
          for (double pos = light_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing)
          {
            p.setMZ(pos);
            light_spec.insert(light_spec.begin(), p);
          }

          double upper_border = light_spec.begin()->getMZ() - expansion_range;
          for (double pos = light_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing)
          {
            p.setMZ(pos);
            light_spec.push_back(p);
          }
        }

        if (heavy_spec.size() > 0)
        {
          // expand heavy spectrum
          double lower_border = heavy_spec.begin()->getMZ() - expansion_range;
          for (double pos = heavy_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing)
          {
            p.setMZ(pos);
            heavy_spec.insert(heavy_spec.begin(), p);
          }

          double upper_border = heavy_spec.begin()->getMZ() - expansion_range;
          for (double pos = heavy_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing)
          {
            p.setMZ(pos);
            heavy_spec.push_back(p);
          }
        }

        // create experiments for feature finding
        PeakMap new_exp_light, new_exp_heavy;
        new_exp_light.addSpectrum(light_spec);
        new_exp_heavy.addSpectrum(heavy_spec);

        if (debug > 9)
        {
          MzMLFile().store(String(it->getRT()) + "_debugging_light.mzML", new_exp_light);
          MzMLFile().store(String(it->getRT()) + "_debugging_heavy.mzML", new_exp_heavy);
        }

        writeDebug_("Spectrum-id: " + it->getNativeID() + " @ " + String(it->getRT()) + "s", 1);

        new_exp_light.updateRanges();
        new_exp_heavy.updateRanges();

        FeatureMap feature_map_light, feature_map_heavy, seeds;
        if (light_spec.size() > 0)
        {
          ff.run("isotope_wavelet", new_exp_light, feature_map_light, ff_param, seeds);
        }
        writeDebug_("#light_features=" + String(feature_map_light.size()), 1);
        if (heavy_spec.size() > 0)
        {
          ff.run("isotope_wavelet", new_exp_heavy, feature_map_heavy, ff_param, seeds);
        }
        writeDebug_("#heavy_features=" + String(feature_map_heavy.size()), 1);

        // search if feature maps to m/z value of pair
        vector<MatchedFeature> light, heavy;
        for (FeatureMap::const_iterator fit = feature_map_light.begin(); fit != feature_map_light.end(); ++fit)
        {
          all_features.push_back(*fit);
          light.push_back(MatchedFeature(*fit, idx));
        }
        for (FeatureMap::const_iterator fit = feature_map_heavy.begin(); fit != feature_map_heavy.end(); ++fit)
        {
          all_features.push_back(*fit);
          heavy.push_back(MatchedFeature(*fit, idx));
        }

        if (!heavy.empty() && !light.empty())
        {
          writeDebug_("Finding best feature pair out of " + String(light.size()) + " light and " + String(heavy.size()) + " heavy matching features.", 1);
          // now find "good" matches, means the pair with the smallest m/z deviation
          Feature best_light, best_heavy;
          double best_deviation(numeric_limits<double>::max());
          Size best_idx(pairs.size());
          for (vector<MatchedFeature>::const_iterator fit1 = light.begin(); fit1 != light.end(); ++fit1)
          {
            for (vector<MatchedFeature>::const_iterator fit2 = heavy.begin(); fit2 != heavy.end(); ++fit2)
            {
              if (fit1->idx != fit2->idx || fit1->f.getCharge() != fit2->f.getCharge() ||
                  fabs(fit1->f.getMZ() - pairs[fit1->idx].mz_light) > precursor_mass_tolerance ||
                  fabs(fit2->f.getMZ() - pairs[fit2->idx].mz_heavy) > precursor_mass_tolerance)
              {
                continue;
              }
              double deviation(0);
              deviation = fabs((fit1->f.getMZ() - pairs[fit1->idx].mz_light) - (fit2->f.getMZ() - pairs[fit2->idx].mz_heavy));
              if (deviation < best_deviation && deviation < precursor_mass_tolerance)
              {
                best_light = fit1->f;
                best_heavy = fit2->f;
                best_idx = fit1->idx;
              }
            }
          }

          if (best_idx == pairs.size())
          {
            continue;
          }

          writeDebug_("Ratio: " + String(best_heavy.getIntensity() / best_light.getIntensity()), 1);
          ConsensusFeature SILAC_feature;
          SILAC_feature.setMZ((best_light.getMZ() + best_heavy.getMZ()) / 2.0);
          SILAC_feature.setRT((best_light.getRT() + best_heavy.getRT()) / 2.0);
          SILAC_feature.insert(0, best_light);
          SILAC_feature.insert(1, best_heavy);
          results_map.push_back(SILAC_feature);
          quantlets.push_back(SILACQuantitation(best_light.getIntensity(), best_heavy.getIntensity(), best_idx));
        }
      }
    }

    // now calculate the final quantitation values from the quantlets
    Map<Size, vector<SILACQuantitation> > idx_to_quantlet;
    for (vector<SILACQuantitation>::const_iterator it = quantlets.begin(); it != quantlets.end(); ++it)
    {
      idx_to_quantlet[it->idx].push_back(*it);
    }

    for (Map<Size, vector<SILACQuantitation> >::ConstIterator it1 = idx_to_quantlet.begin(); it1 != idx_to_quantlet.end(); ++it1)
    {
      SILAC_pair silac_pair = pairs[it1->first];

      // simply add up all intensities and calculate the final ratio
      double light_sum(0), heavy_sum(0);
      vector<double> light_ints, heavy_ints, ratios;
      for (vector<SILACQuantitation>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
      {
        light_sum += it2->light_intensity;
        light_ints.push_back(it2->light_intensity);
        heavy_sum += it2->heavy_intensity;
        heavy_ints.push_back(it2->heavy_intensity);
        ratios.push_back(it2->heavy_intensity / it2->light_intensity * (it2->heavy_intensity + it2->light_intensity));
      }

      double absdev_ratios = Math::absdev(ratios.begin(), ratios.begin() + (ratios.size()) / (heavy_sum + light_sum));
      cout << "Ratio: " << silac_pair.mz_light << " <-> " << silac_pair.mz_heavy << " @ " << silac_pair.rt << " s, ratio(h/l) " << heavy_sum / light_sum << " +/- " << absdev_ratios << " (#scans for quantation: " << String(it1->second.size()) << " )" << endl;
    }


    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    if (feature_out != "")
    {
      FeatureXMLFile().store(feature_out, all_features);
    }
    writeDebug_("Writing output", 1);
    ConsensusXMLFile().store(out, results_map);

    return EXECUTION_OK;
  }
Esempio n. 25
0
	e_ptr->filterSpectrum(spec);
	TEST_EQUAL(spec.size(), 121)

	p.setValue("threshold", 10.0);
	e_ptr->setParameters(p);

	e_ptr->filterSpectrum(spec);
	TEST_EQUAL(spec.size(), 14)
END_SECTION

START_SECTION((void filterPeakMap(PeakMap& exp)))
	DTAFile dta_file;
  PeakSpectrum spec;
  dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	PeakMap pm;
	pm.addSpectrum(spec);

  TEST_EQUAL(pm.begin()->size(), 121)

	Param p(e_ptr->getParameters());
	p.setValue("threshold", 1.0);
	e_ptr->setParameters(p);

  e_ptr->filterPeakMap(pm);
  TEST_EQUAL(pm.begin()->size(), 121)

  p.setValue("threshold", 10.0);
	e_ptr->setParameters(p);
  e_ptr->filterPeakMap(pm);
  TEST_EQUAL(pm.begin()->size(), 14)
Esempio n. 26
0
  ExitCodes main_(int, const char **)
  {
    //input and output file names ..
    String in = getStringOption_("in");
    String out = getStringOption_("out");

    //prevent loading of fragment spectra
    PeakFileOptions options;
    options.setMSLevels(vector<Int>(1, 1));

    //reading input data
    MzMLFile f;
    f.getOptions() = options;
    f.setLogType(log_type_);

    PeakMap exp;
    f.load(in, exp);
    exp.updateRanges();

    //no seeds supported
    FeatureMap<> seeds;

    //setup of FeatureFinder
    FeatureFinder ff;
    ff.setLogType(log_type_);

    // A map for the resulting features
    FeatureMap<> features;

    // get parameters specific for the feature finder
    Param feafi_param = getParam_().copy("algorithm:", true);
    writeDebug_("Parameters passed to FeatureFinder", feafi_param, 3);

    // Apply the feature finder
    ff.run(FeatureFinderAlgorithmIsotopeWavelet<Peak1D, Feature>::getProductName(), exp, features, feafi_param, seeds);
    features.applyMemberFunction(&UniqueIdInterface::setUniqueId);

    // DEBUG
    if (debug_level_ > 10)
    {
      FeatureMap<>::Iterator it;
      for (it = features.begin(); it != features.end(); ++it)
      {
        if (!it->isMetaEmpty())
        {
          vector<String> keys;
          it->getKeys(keys);
          LOG_INFO << "Feature " << it->getUniqueId() << endl;
          for (Size i = 0; i < keys.size(); i++)
          {
            LOG_INFO << "  " << keys[i] << " = " << it->getMetaValue(keys[i]) << endl;
          }
        }
      }
    }

    //-------------------------------------------------------------
    // writing files
    //-------------------------------------------------------------

    //annotate output with data processing info
    addDataProcessing_(features, getProcessingInfo_(DataProcessing::QUANTITATION));

    // write features to user specified output file
    FeatureXMLFile map_file;
    map_file.store(out, features);

    return EXECUTION_OK;
  }
Esempio n. 27
0
  ptr = new MascotGenericFile();
  TEST_NOT_EQUAL(ptr, nullPointer)
}
END_SECTION

START_SECTION(virtual ~MascotGenericFile())
{
  delete ptr;
}
END_SECTION

ptr = new MascotGenericFile();

START_SECTION((template < typename MapType > void load(const String &filename, MapType &exp)))
{
  PeakMap exp;
  ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp);
  TEST_EQUAL(exp.size(), 1)

  TEST_EQUAL(exp.begin()->size(), 9)
}
END_SECTION

START_SECTION((void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact = false)))
{
  PeakMap exp;
  ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp);
  
  // handling of modifications:
  Param params = ptr->getParameters();
  params.setValue("fixed_modifications", ListUtils::create<String>("Carbamidomethyl (C),Phospho (S)"));
Esempio n. 28
0
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    in = getStringOption_("in");
    out = getStringOption_("out");
    String process_option = getStringOption_("processOption");

    Param filter_param = getParam_().copy("algorithm:", true);
    writeDebug_("Parameters passed to filter", filter_param, 3);

    SavitzkyGolayFilter sgolay;
    sgolay.setLogType(log_type_);
    sgolay.setParameters(filter_param);

    if (process_option == "lowmemory")
    {
      return doLowMemAlgorithm(sgolay);
    }

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    MzMLFile mz_data_file;
    mz_data_file.setLogType(log_type_);
    PeakMap exp;
    mz_data_file.load(in, exp);

    if (exp.empty() && exp.getChromatograms().size() == 0)
    {
      LOG_WARN << "The given file does not contain any conventional peak data, but might"
                  " contain chromatograms. This tool currently cannot handle them, sorry.";
      return INCOMPATIBLE_INPUT_DATA;
    }
    //check for peak type (profile data required)
    if (!exp.empty() && PeakTypeEstimator().estimateType(exp[0].begin(), exp[0].end()) == SpectrumSettings::PEAKS)
    {
      writeLog_("Warning: OpenMS peak type estimation indicates that this is not profile data!");
    }

    //check if spectra are sorted
    for (Size i = 0; i < exp.size(); ++i)
    {
      if (!exp[i].isSorted())
      {
        writeLog_("Error: Not all spectra are sorted according to peak m/z positions. Use FileFilter to sort the input!");
        return INCOMPATIBLE_INPUT_DATA;
      }
    }

    //check if chromatograms are sorted
    for (Size i = 0; i < exp.getChromatograms().size(); ++i)
    {
      if (!exp.getChromatogram(i).isSorted())
      {
        writeLog_("Error: Not all chromatograms are sorted according to peak m/z positions. Use FileFilter to sort the input!");
        return INCOMPATIBLE_INPUT_DATA;
      }
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    sgolay.filterExperiment(exp);

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    //annotate output with data processing info
    addDataProcessing_(exp, getProcessingInfo_(DataProcessing::SMOOTHING));

    mz_data_file.store(out, exp);

    return EXECUTION_OK;
  }
Esempio n. 29
0
	DTAFile dta_file;
	PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);
	
	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), 37.5)

	e_ptr->filterSpectrum(spec);
	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), sqrt(37.5))
END_SECTION

START_SECTION((void filterPeakMap(PeakMap& exp)))
	DTAFile dta_file;
  PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	PeakMap pm;
	pm.addSpectrum(spec);

	TEST_REAL_SIMILAR((pm.begin()->begin() + 40)->getIntensity(), 37.5)

	e_ptr->filterPeakMap(pm);
	TEST_REAL_SIMILAR((pm.begin()->begin() + 40)->getIntensity(), sqrt(37.5))
END_SECTION

START_SECTION((void filterPeakSpectrum(PeakSpectrum& spectrum)))
	DTAFile dta_file;
  PeakSpectrum spec;
  dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), 37.5)
Esempio n. 30
0
START_SECTION(~CachedmzML())
{
  delete ptr;
}
END_SECTION

// see also MSDataCachedConsumer_test.cpp -> consumeSpectrum
// this is a complete test of the caching object
START_SECTION(( [EXTRA] testCaching))
{
  std::string tmp_filename;
  NEW_TMP_FILE(tmp_filename);

  // Load experiment
  PeakMap exp;
  MzMLFile().load(OPENMS_GET_TEST_DATA_PATH("MzMLFile_1.mzML"), exp);
  TEST_EQUAL(exp.getNrSpectra() > 0, true)
  TEST_EQUAL(exp.getNrChromatograms() > 0, true)

  // Cache the experiment to a temporary file
  CachedmzML cache;
  cache.writeMemdump(exp, tmp_filename);

  // Check whether spectra were written to disk correctly...
  {
    // Create the index from the given file
    cache.createMemdumpIndex(tmp_filename);
    std::vector<std::streampos> spectra_index = cache.getSpectraIndex();
    TEST_EQUAL(spectra_index.size(), 4)
    std::ifstream ifs_(tmp_filename.c_str(), std::ios::binary);