Exemplo n.º 1
0
 void ParentPeakMower::filterPeakMap(PeakMap & exp)
 {
   for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
   {
     filterSpectrum(*it);
   }
 }
Exemplo n.º 2
0
 void Normalizer::filterPeakMap(PeakMap & exp)
 {
   for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
   {
     filterSpectrum(*it);
   }
 }
Exemplo n.º 3
0
void CompNovoIdentificationCID::getIdentifications(vector<PeptideIdentification> & pep_ids, const PeakMap & exp)
{
    Size count(1);
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it, ++count)
    {
        //cerr << count << "/" << exp.size() << endl;
        PeptideIdentification id;
        // TODO check if both CID and ETD is present;
        PeakSpectrum CID_spec(*it);
        id.setRT(it->getRT());
        id.setMZ(it->getPrecursors().begin()->getMZ());

        subspec_to_sequences_.clear();
        permute_cache_.clear();
        decomp_cache_.clear();

        getIdentification(id, CID_spec);
        //cerr << "size_of id=" << id.getHits().size() << endl;
        pep_ids.push_back(id);

        //++it;

        //
        //if (count == 10)
        //{
        //return;
        //}
    }
    return;
}
 // lists of peptide hits in "maps" will be sorted
 bool MapAlignmentAlgorithmIdentification::getRetentionTimes_(
   PeakMap& experiment, SeqToList& rt_data)
 {
   for (PeakMap::Iterator exp_it = experiment.begin();
        exp_it != experiment.end(); ++exp_it)
   {
     getRetentionTimes_(exp_it->getPeptideIdentifications(), rt_data);
   }
   // duplicate annotations should not be possible -> no need to remove them
   return false;
 }
Exemplo n.º 5
0
 void WindowMower::filterPeakMap(PeakMap & exp)
 {
   bool sliding = (String)param_.getValue("movetype") == "slide" ? true : false;
   for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
   {
     if (sliding)
     {
       filterPeakSpectrumForTopNInSlidingWindow(*it);
     } else
     {
       filterPeakSpectrumForTopNInJumpingWindow(*it);
     }
   }
 }
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    String in_spectra = getStringOption_("in_spectra");
    String in_identifications = getStringOption_("in_identifications");
    String outfile = getStringOption_("model_output_file");
    Int precursor_charge = getIntOption_("precursor_charge");

    //-------------------------------------------------------------
    // init SvmTheoreticalSpectrumGeneratorTrainer
    //-------------------------------------------------------------
    SvmTheoreticalSpectrumGeneratorTrainer trainer;

    Param param = getParam_().copy("algorithm:", true);
    String write_files = getFlag_("write_training_files") ? "true" : "false";
    param.setValue("write_training_files", write_files);
    trainer.setParameters(param);

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    PeakMap map;
    MzMLFile().load(in_spectra, map);

    std::vector<PeptideIdentification> pep_ids;
    std::vector<ProteinIdentification> prot_ids;
    String tmp_str;
    IdXMLFile().load(in_identifications, prot_ids, pep_ids, tmp_str);

    IDMapper idmapper;
    Param par;
    par.setValue("rt_tolerance", 0.001);
    par.setValue("mz_tolerance", 0.001);
    idmapper.setParameters(par);
    idmapper.annotate(map, pep_ids, prot_ids);

    //generate vector of annotations
    std::vector<AASequence> annotations;
    PeakMap::iterator it;
    for (it = map.begin(); it != map.end(); ++it)
    {
      annotations.push_back(it->getPeptideIdentifications()[0].getHits()[0].getSequence());
    }

    trainer.trainModel(map, annotations, outfile, precursor_charge);
    return EXECUTION_OK;
  }
Exemplo n.º 7
0
	p.setValue("threshold", 10.0);
	e_ptr->setParameters(p);

	e_ptr->filterSpectrum(spec);
	TEST_EQUAL(spec.size(), 14)
END_SECTION

START_SECTION((void filterPeakMap(PeakMap& exp)))
	DTAFile dta_file;
  PeakSpectrum spec;
  dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	PeakMap pm;
	pm.addSpectrum(spec);

  TEST_EQUAL(pm.begin()->size(), 121)

	Param p(e_ptr->getParameters());
	p.setValue("threshold", 1.0);
	e_ptr->setParameters(p);

  e_ptr->filterPeakMap(pm);
  TEST_EQUAL(pm.begin()->size(), 121)

  p.setValue("threshold", 10.0);
	e_ptr->setParameters(p);
  e_ptr->filterPeakMap(pm);
  TEST_EQUAL(pm.begin()->size(), 14)

END_SECTION
  ExitCodes main_(int, const char**)
  {
    // parsing parameters
    String in(getStringOption_("in"));
    String feature_in(getStringOption_("feature_in"));
    String out(getStringOption_("out"));
    double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance"));

    // reading input
    FileHandler fh;
    FileTypes::Type in_type = fh.getType(in);

    PeakMap exp;
    fh.loadExperiment(in, exp, in_type, log_type_, false, false);
    exp.sortSpectra();

    FeatureMap feature_map;
    if (feature_in != "")
    {
      FeatureXMLFile().load(feature_in, feature_map);
    }

    // calculations
    FeatureFinderAlgorithmIsotopeWavelet iso_ff;
    Param ff_param(iso_ff.getParameters());
    ff_param.setValue("max_charge", getIntOption_("max_charge"));
    ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold"));
    iso_ff.setParameters(ff_param);

    FeatureFinder ff;
    ff.setLogType(ProgressLogger::NONE);

    PeakMap exp2 = exp;
    exp2.clear(false);
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->size() != 0)
      {
        exp2.addSpectrum(*it);
      }
    }

    exp = exp2;
    exp.updateRanges();

    // TODO check MS2 and MS1 counts
    ProgressLogger progresslogger;
    progresslogger.setLogType(log_type_);
    progresslogger.startProgress(0, exp.size(), "Correcting precursor masses");
    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      progresslogger.setProgress(exp.end() - it);
      if (it->getMSLevel() != 2)
      {
        continue;
      }
      // find first MS1 scan of the MS/MS scan
      PeakMap::Iterator ms1_it = it;
      while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1)
      {
        --ms1_it;
      }
      if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1)
      {
        writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT()));
        continue;
      }
      if (ms1_it->size() == 0)
      {
        writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1);
        continue;
      }

      PeakMap::Iterator ms2_it = ms1_it;
      ++ms2_it;

      while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2)
      {
        // first: error checks
        if (ms2_it->getPrecursors().empty())
        {
          writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1);
          ++ms2_it;
          continue;
        }
        else if (ms2_it->getPrecursors().size() > 1)
        {
          writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one.");
        }

        Precursor prec = *ms2_it->getPrecursors().begin();
        double prec_pos = prec.getMZ();

        PeakMap new_exp;
        // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...)
        PeakSpectrum zoom_spec;
        for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit)
        {
          if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3)
          {
            zoom_spec.push_back(*pit);
          }
        }
        new_exp.addSpectrum(zoom_spec);
        new_exp.updateRanges();
        FeatureMap features, seeds;
        ff.run("isotope_wavelet", new_exp, features, ff_param, seeds);
        if (features.empty())
        {
          writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1);
          ++ms2_it;
          continue;
        }

        double max_int(numeric_limits<double>::min());
        double min_dist(numeric_limits<double>::max());
        Size max_int_feat_idx(0);

        for (Size i = 0; i != features.size(); ++i)
        {
          if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance &&
              features[i].getIntensity() > max_int)
          {
            max_int_feat_idx = i;
            max_int = features[i].getIntensity();
            min_dist = fabs(features[i].getMZ() - prec_pos);
          }
        }


        writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5);
        if (min_dist < precursor_mass_tolerance)
        {
          prec.setMZ(features[max_int_feat_idx].getMZ());
          prec.setCharge(features[max_int_feat_idx].getCharge());
          vector<Precursor> precs;
          precs.push_back(prec);
          ms2_it->setPrecursors(precs);
          writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1);
        }

        ++ms2_it;
      }
      it = --ms2_it;
    }
    progresslogger.endProgress();

    // writing output
    fh.storeExperiment(out, exp, log_type_);

    return EXECUTION_OK;
  }
    set<Size> correctToNearestMS1Peak(PeakMap & exp, double mz_tolerance, bool ppm, vector<double> & deltaMZs, vector<double> & mzs, vector<double> & rts)
    {
      set<Size> corrected_precursors;
      // load experiment and extract precursors
      vector<Precursor> precursors;  // precursor
      vector<double> precursors_rt;  // RT of precursor MS2 spectrum
      vector<Size> precursor_scan_index;
      getPrecursors_(exp, precursors, precursors_rt, precursor_scan_index);

      for (Size i = 0; i != precursors_rt.size(); ++i)
      {
        // get precursor rt
        double rt = precursors_rt[i];

        // get precursor MZ
        double mz = precursors[i].getMZ();

        //cout << rt << " " << mz << endl;

        // get precursor spectrum
        MSExperiment<Peak1D>::ConstIterator rt_it = exp.RTBegin(rt - 1e-8);

        // store index of MS2 spectrum
        UInt precursor_spectrum_idx = rt_it - exp.begin();

        // get parent (MS1) of precursor spectrum
        rt_it = exp.getPrecursorSpectrum(rt_it);

        if (rt_it->getMSLevel() != 1)
        {
          LOG_WARN << "Error: no MS1 spectrum for this precursor" << endl;
        }

        //cout << rt_it->getRT() << " " << rt_it->size() << endl;

        // find peak (index) closest to expected position
        Size nearest_peak_idx = rt_it->findNearest(mz);

        // get actual position of closest peak
        double nearest_peak_mz = (*rt_it)[nearest_peak_idx].getMZ();

        // calculate error between expected and actual position
        double nearestPeakError = ppm ? abs(nearest_peak_mz - mz)/mz * 1e6 : abs(nearest_peak_mz - mz);

        // check if error is small enough
        if (nearestPeakError < mz_tolerance)
        {
          // sanity check: do we really have the same precursor in the original and the picked spectrum
          if (fabs(exp[precursor_spectrum_idx].getPrecursors()[0].getMZ() - mz) > 0.0001)
          {
            LOG_WARN << "Error: index is referencing different precursors in original and picked spectrum." << endl;
          }

          // cout << mz << " -> " << nearest_peak_mz << endl;
          double deltaMZ = nearest_peak_mz - mz;
          deltaMZs.push_back(deltaMZ);
          mzs.push_back(mz);
          rts.push_back(rt);
          // correct entries
          Precursor corrected_prec = precursors[i];
          corrected_prec.setMZ(nearest_peak_mz);
          exp[precursor_spectrum_idx].getPrecursors()[0] = corrected_prec;
          corrected_precursors.insert(precursor_spectrum_idx);
        }
      }
      return corrected_precursors;
    }
Exemplo n.º 10
0
  TEST_STRING_EQUAL(FileTypes::typeToName(e.getLoadedFileType()),"dta2d");

  TEST_EQUAL(e.size(), 9);
  ABORT_IF(e.size() != 9)

  TEST_STRING_EQUAL(e[0].getNativeID(),"index=0")
  TEST_STRING_EQUAL(e[1].getNativeID(),"index=1")
  TEST_STRING_EQUAL(e[2].getNativeID(),"index=2")
  TEST_STRING_EQUAL(e[3].getNativeID(),"index=3")
  TEST_STRING_EQUAL(e[4].getNativeID(),"index=4")
  TEST_STRING_EQUAL(e[5].getNativeID(),"index=5")
  TEST_STRING_EQUAL(e[6].getNativeID(),"index=6")
  TEST_STRING_EQUAL(e[7].getNativeID(),"index=7")
  TEST_STRING_EQUAL(e[8].getNativeID(),"index=8")

  PeakMap::const_iterator it(e.begin());
  TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 230.02)
  TEST_REAL_SIMILAR(it->getRT(), 4711.1)
  TEST_REAL_SIMILAR((*it)[0].getIntensity(), 47218.89)
  ++it;

  TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 231.51)
  TEST_REAL_SIMILAR(it->getRT(), 4711.2)
  TEST_REAL_SIMILAR((*it)[0].getIntensity(), 89935.22)
  ++it;

  TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 139.42)
  TEST_REAL_SIMILAR(it->getRT(), 4711.3)
  TEST_REAL_SIMILAR((*it)[0].getIntensity(), 318.52)
  ++it;
Exemplo n.º 11
0
  void MassTraceDetection::run(const PeakMap& input_exp, std::vector<MassTrace>& found_masstraces)
  {
    // make sure the output vector is empty
    found_masstraces.clear();

    // gather all peaks that are potential chromatographic peak apices
    //   - use work_exp for actual work (remove peaks below noise threshold)
    //   - store potential apices in chrom_apices
    PeakMap work_exp;
    MapIdxSortedByInt chrom_apices;

    Size total_peak_count(0);
    std::vector<Size> spec_offsets;
    spec_offsets.push_back(0);

    Size spectra_count(0);

    // *********************************************************** //
    //  Step 1: Detecting potential chromatographic apices
    // *********************************************************** //
    for (PeakMap::ConstIterator it = input_exp.begin(); it != input_exp.end(); ++it)
    {
      // check if this is a MS1 survey scan
      if (it->getMSLevel() != 1) continue;

      std::vector<Size> indices_passing;
      for (Size peak_idx = 0; peak_idx < it->size(); ++peak_idx)
      {
        double tmp_peak_int((*it)[peak_idx].getIntensity());
        if (tmp_peak_int > noise_threshold_int_)
        {
          // Assume that noise_threshold_int_ contains the noise level of the
          // data and we want to be chrom_peak_snr times above the noise level
          // --> add this peak as possible chromatographic apex
          if (tmp_peak_int > chrom_peak_snr_ * noise_threshold_int_)
          {
            chrom_apices.insert(std::make_pair(tmp_peak_int, std::make_pair(spectra_count, indices_passing.size())));
          }
          indices_passing.push_back(peak_idx);
          ++total_peak_count;
        }
      }
      PeakMap::SpectrumType tmp_spec(*it);
      tmp_spec.select(indices_passing);
      work_exp.addSpectrum(tmp_spec);
      spec_offsets.push_back(spec_offsets.back() + tmp_spec.size());
      ++spectra_count;
    }

    if (spectra_count < 3)
    {
      throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
                                    "Input map consists of too few MS1 spectra (less than 3!). Aborting...", String(spectra_count));
    }

    // discard last spectrum's offset
    spec_offsets.pop_back();

    // *********************************************************************
    // Step 2: start extending mass traces beginning with the apex peak (go
    // through all peaks in order of decreasing intensity)
    // *********************************************************************
    run_(chrom_apices, total_peak_count, work_exp, spec_offsets, found_masstraces);

    return;
  } // end of MassTraceDetection::run
Exemplo n.º 12
0
  ExitCodes main_(int, const char **) override
  {

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    String in = getStringOption_("in");
    String out = getStringOption_("out");

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile f;
    f.load(in, exp);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    //determine maximum peak
    exp.updateRanges();
    double max = exp.getMaxInt() / 100.0;

    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->getMSLevel() < 2)
      {
        for (PeakMap::SpectrumType::Iterator it2 = it->begin(); it2 != it->end(); ++it2)
        {
          it2->setIntensity(it2->getIntensity() / max);
        }
      }
    }


    /// @todo add chromatogram support for normalization, e.g. for MRM stuff (Andreas)
    /*
      vector<MSChromatogram > chroms = exp.getChromatograms();
      double sum(0);
for (vector<MSChromatogram >::iterator it = chroms.begin(); it != chroms.end(); ++it)
{
  for (MSChromatogram::Iterator it2 = it->begin(); it2 != it->end(); ++it2)
  {
              sum += it2->getIntensity();
          }
      }

      for (vector<MSChromatogram >::iterator it = chroms.begin(); it != chroms.end(); ++it)
      {
          for (MSChromatogram::Iterator it2 = it->begin(); it2 != it->end(); ++it2)
          {
              it2->setIntensity(it2->getIntensity() / sum * 1000000.0);
          }
      }

      exp.setChromatograms(chroms);
    */

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    //annotate output with data processing info
    addDataProcessing_(exp, getProcessingInfo_(DataProcessing::NORMALIZATION));

    f.store(out, exp);

    return EXECUTION_OK;
  }
  ExitCodes main_(int argc, const char** argv)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input/output files
    String in(getStringOption_("in")), out(getStringOption_("out"));
    FileHandler fh;
    FileTypes::Type in_type = fh.getType(in);

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------

    PeakMap exp;
    // keep only MS2 spectra
    fh.getOptions().addMSLevel(2);
    fh.loadExperiment(in, exp, in_type, log_type_);
    writeDebug_(String("Spectra loaded: ") + exp.size(), 2);

    if (exp.getSpectra().empty())
    {
      throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file.");
    }

    // determine type of spectral data (profile or centroided)
    SpectrumSettings::SpectrumType spectrum_type = exp[0].getType();

    if (spectrum_type == SpectrumSettings::RAWDATA)
    {
      if (!getFlag_("force"))
      {
        throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag.");
      }
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    Param mascot_param = getParam_().copy("Mascot_parameters:", true);
    MascotGenericFile mgf_file;
    Param p;
    // TODO: switch this to mzML (much smaller)
    p.setValue("internal:format", "Mascot generic", "Sets the format type of the peak list, this should not be changed unless you write the header only.", ListUtils::create<String>("advanced"));
    p.setValue("internal:HTTP_format", "true", "Write header with MIME boundaries instead of simple key-value pairs. For HTTP submission only.", ListUtils::create<String>("advanced"));
    p.setValue("internal:content", "all", "Use parameter header + the peak lists with BEGIN IONS... or only one of them.", ListUtils::create<String>("advanced"));
    mgf_file.setParameters(mascot_param);

    // get the spectra into string stream
    writeDebug_("Writing MGF file to stream", 1);
    stringstream ss;
    mgf_file.store(ss, in, exp, true); // write in compact format

    // Usage of a QCoreApplication is overkill here (and ugly too), but we just use the
    // QEventLoop to process the signals and slots and grab the results afterwards from
    // the MascotRemotQuery instance
    char** argv2 = const_cast<char**>(argv);
    QCoreApplication event_loop(argc, argv2);
    MascotRemoteQuery* mascot_query = new MascotRemoteQuery(&event_loop);
    Param mascot_query_param = getParam_().copy("Mascot_server:", true);
    writeDebug_("Setting parameters for Mascot query", 1);
    mascot_query->setParameters(mascot_query_param);
    writeDebug_("Setting spectra for Mascot query", 1);
    mascot_query->setQuerySpectra(ss.str());

    // remove unnecessary spectra
    ss.clear();

    QObject::connect(mascot_query, SIGNAL(done()), &event_loop, SLOT(quit()));
    QTimer::singleShot(1000, mascot_query, SLOT(run()));
    writeDebug_("Fire off Mascot query", 1);
    event_loop.exec();
    writeDebug_("Mascot query finished", 1);

    if (mascot_query->hasError())
    {
      writeLog_("An error occurred during the query: " + mascot_query->getErrorMessage());
      delete mascot_query;
      return EXTERNAL_PROGRAM_ERROR;
    }

    // write Mascot response to file
    String mascot_tmp_file_name(File::getTempDirectory() + "/" + File::getUniqueName() + "_Mascot_response");
    QFile mascot_tmp_file(mascot_tmp_file_name.c_str());
    mascot_tmp_file.open(QIODevice::WriteOnly);
    mascot_tmp_file.write(mascot_query->getMascotXMLResponse());
    mascot_tmp_file.close();

    // clean up
    delete mascot_query;

    vector<PeptideIdentification> pep_ids;
    ProteinIdentification prot_id;

    // set up mapping between scan numbers and retention times:
    MascotXMLFile::RTMapping rt_mapping;
    MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping);

    // read the response
    MascotXMLFile().load(mascot_tmp_file_name, prot_id, pep_ids, rt_mapping);
    writeDebug_("Read " + String(pep_ids.size()) + " peptide ids and " + String(prot_id.getHits().size()) + " protein identifications from Mascot", 5);

    // for debugging errors relating to unexpected response files
    if (this->debug_level_ >= 100)
    {
      writeDebug_(String("\nMascot Server Response file saved to: '") + mascot_tmp_file_name + "'. If an error occurs, send this file to the OpenMS team.\n", 100);
    }
    else
    {
      // delete file
      mascot_tmp_file.remove();
    }

    // keep or delete protein identifications?!
    vector<ProteinIdentification> prot_ids;
    if (!getFlag_("keep_protein_links"))
    {
      // remove protein links from peptides
      for (Size i = 0; i < pep_ids.size(); ++i)
      {
        std::vector<PeptideHit> hits = pep_ids[i].getHits();
        for (Size h = 0; h < hits.size(); ++h)
        {
          hits[h].setPeptideEvidences(vector<PeptideEvidence>());
        }
        pep_ids[i].setHits(hits);
      }
      // remove proteins
      std::vector<ProteinHit> p_hit;
      prot_id.setHits(p_hit);
    }
    prot_ids.push_back(prot_id);

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    IdXMLFile().store(out, prot_ids, pep_ids);

    return EXECUTION_OK;
  }
Exemplo n.º 14
0
  void correct(PeakMap & exp, vector<DoubleReal> & deltaMZs, vector<DoubleReal> & mzs, vector<DoubleReal> & rts)
  {
    // load experiment and extract precursors
    vector<Precursor> precursors;  // precursor
    vector<double> precursors_rt;  // RT of precursor MS2 spectrum
    getPrecursors_(exp, precursors, precursors_rt);

    for (Size i = 0; i != precursors_rt.size(); ++i)
    {
      // get precursor rt
      DoubleReal rt = precursors_rt[i];

      // get precursor MZ
      DoubleReal mz = precursors[i].getMZ();

      //cout << rt << " " << mz << endl;

      // get precursor spectrum
      MSExperiment<Peak1D>::ConstIterator rt_it = exp.RTBegin(rt);

      // store index of MS2 spectrum
      UInt precursor_spectrum_idx = rt_it - exp.begin();

      // get parent (MS1) of precursor spectrum
      rt_it = exp.getPrecursorSpectrum(rt_it);

      if (rt_it->getMSLevel() != 1)
      {
        cout << "Error: no MS1 spectrum for this precursor" << endl;
      }

      //cout << rt_it->getRT() << " " << rt_it->size() << endl;

      // find peak (index) closest to expected position
      Size nearest_peak_idx = rt_it->findNearest(mz);

      // get actual position of closest peak
      DoubleReal nearest_peak_mz = (*rt_it)[nearest_peak_idx].getMZ();

      // calculate error between expected and actual position
      DoubleReal nearestPeakError = abs(nearest_peak_mz - mz);

      // check if error is small enough
      if (nearestPeakError < 0.1)
      {
        // sanity check: do we really have the same precursor in the original and the picked spectrum
        if (fabs(exp[precursor_spectrum_idx].getPrecursors()[0].getMZ() - mz) > 0.0001)
        {
          cout << "Error: index is referencing different precursors in original and picked spectrum." << endl;
        }

        // cout << mz << " -> " << nearest_peak_mz << endl;
        DoubleReal deltaMZ = nearest_peak_mz - mz;
        deltaMZs.push_back(deltaMZ);
        mzs.push_back(mz);
        rts.push_back(rt);
        // correct entries
        Precursor corrected_prec = precursors[i];
        corrected_prec.setMZ(nearest_peak_mz);
        exp[precursor_spectrum_idx].getPrecursors()[0] = corrected_prec;
      }
    }
  }
  ExitCodes
  main_(int, const char**)
  {
    //-------------------------------------------------------------
    // general variables and data
    //-------------------------------------------------------------
    FileHandler fh;
    vector<PeptideIdentification> peptide_identifications;
    vector<ProteinIdentification> protein_identifications;

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    const String in = getStringOption_("in");

    ProgressLogger logger;
    logger.setLogType(ProgressLogger::CMD);
    logger.startProgress(0, 1, "Loading...");

    if (File::isDirectory(in))
    {
      const String in_directory = File::absolutePath(in).ensureLastChar('/');
      const String mz_file = getStringOption_("mz_file");
      const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide");

      UInt i = 0;
      FileHandler fh;
      FileTypes::Type type;
      MSExperiment<Peak1D> msexperiment;
      // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;)  However, now String::toInt() might throw.
      map<Int, float> num_and_rt;
      vector<String> NativeID;

      // The mz-File (if given)
      if (!mz_file.empty())
      {
        type = fh.getTypeByFileName(mz_file);
        fh.loadExperiment(mz_file, msexperiment, type);

        for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it)
        {
          String(spectra_it->getNativeID()).split('=', NativeID);
          try
          {
            num_and_rt[NativeID[1].toInt()] = spectra_it->getRT();
            // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01
          }
          catch (Exception::ConversionError& e)
          {
            writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
          }
        }
      }

      // Get list of the actual Sequest .out-Files
      StringList in_files;
      if (!File::fileList(in_directory, String("*.out"), in_files))
      {
        writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!");
      }

      // Now get to work ...
      for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it)
      {
        vector<PeptideIdentification> peptide_ids_seq;
        ProteinIdentification protein_id_seq;
        vector<double> pvalues_seq;
        vector<String> in_file_vec;

        SequestOutfile sequest_outfile;

        writeDebug_(String("Reading file ") + *in_files_it, 3);

        try
        {
          sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide);

          in_files_it->split('.', in_file_vec);

          for (Size j = 0; j < peptide_ids_seq.size(); ++j)
          {

            // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files.
            peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i);

            Int scan_number = 0;
            if (!mz_file.empty())
            {
              try
              {
                scan_number = in_file_vec[2].toInt();
                peptide_ids_seq[j].setRT(num_and_rt[scan_number]);
              }
              catch (Exception::ConversionError& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
              }
              catch (exception& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.what());
              }
              //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz
              const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge();
              peptide_ids_seq[j].setMZ(real_mz);
            }

            writeDebug_(String("scan: ") + String(scan_number) + String("  RT: ") + String(peptide_ids_seq[j].getRT()) + "  MZ: " + String(peptide_ids_seq[j].getMZ()) + "  Ident: " + peptide_ids_seq[j].getIdentifier(), 4);

            peptide_identifications.push_back(peptide_ids_seq[j]);
          }

          protein_id_seq.setIdentifier(*in_files_it + "_" + i);
          protein_identifications.push_back(protein_id_seq);
          ++i;
        }
        catch (Exception::ParseError& pe)
        {
          writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")");
          throw;
        }
        catch (...)
        {
          writeLog_(String("Error reading file: ") + *in_files_it);
          throw;
        }
      }

      writeDebug_("All files processed.", 3);
    } // ! directory
    else
    {
      FileTypes::Type in_type = fh.getType(in);

      if (in_type == FileTypes::PEPXML)
      {
        String exp_name = getStringOption_("mz_file");
        String orig_name =  getStringOption_("mz_name");
        bool use_precursor_data = getFlag_("use_precursor_data");

        if (exp_name.empty())
        {
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, orig_name);
        }
        else
        {
          MSExperiment<> exp;
          fh.loadExperiment(exp_name, exp);
          if (!orig_name.empty())
          {
            exp_name = orig_name;
          }
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, exp_name, exp,
                            use_precursor_data);
        }
      }
      else if (in_type == FileTypes::IDXML)
      {
        IdXMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::MZIDENTML)
      {
        LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl;
        MzIdentMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::PROTXML)
      {
        protein_identifications.resize(1);
        peptide_identifications.resize(1);
        ProtXMLFile().load(in, protein_identifications[0],
                           peptide_identifications[0]);
      }
      else if (in_type == FileTypes::OMSSAXML)
      {
        protein_identifications.resize(1);
        OMSSAXMLFile().load(in, protein_identifications[0],
                            peptide_identifications, true);
      }
      else if (in_type == FileTypes::MASCOTXML)
      {
        String scan_regex = getStringOption_("scan_regex");
        String exp_name = getStringOption_("mz_file");
        MascotXMLFile::RTMapping rt_mapping;
        if (!exp_name.empty())
        {
          PeakMap exp;
          // load only MS2 spectra:
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping);
        }
        protein_identifications.resize(1);
        MascotXMLFile().load(in, protein_identifications[0],
                             peptide_identifications, rt_mapping, scan_regex);
      }
      else if (in_type == FileTypes::XML)
      {
        ProteinIdentification protein_id;
        XTandemXMLFile().load(in, protein_id, peptide_identifications);
        protein_id.setSearchEngineVersion("");
        protein_id.setSearchEngine("XTandem");
        protein_identifications.push_back(protein_id);
        String exp_name = getStringOption_("mz_file");
        if (!exp_name.empty())
        {
          PeakMap exp;
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it)
          {
            UInt id = (Int)it->getMetaValue("spectrum_id");
            --id; // native IDs were written 1-based
            if (id < exp.size())
            {
              it->setRT(exp[id].getRT());
              double pre_mz(0.0);
              if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ();
              it->setMZ(pre_mz);
              it->removeMetaValue("spectrum_id");
            }
            else
            {
              LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl;
            }
          }
        }
      }
      else
      {
        writeLog_("Unknown input file type given. Aborting!");
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
    }
    logger.endProgress();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    const String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));
    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }
    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    logger.startProgress(0, 1, "Storing...");
    if (out_type == FileTypes::PEPXML)
    {
      bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed");
      String mz_file = getStringOption_("mz_file");
      String mz_name = getStringOption_("mz_name");
      PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed);
    }
    else if (out_type == FileTypes::IDXML)
    {
      IdXMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::MZIDENTML)
    {
      MzIdentMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::FASTA)
    {
      Size count = 0;
      ofstream fasta(out.c_str(), ios::out);
      for (Size i = 0; i < peptide_identifications.size(); ++i)
      {
        for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l)
        {
          const PeptideHit& hit = peptide_identifications[i].getHits()[l];
          fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++
                << "|" << hit.getSequence().toString() << endl;
          String seq = hit.getSequence().toUnmodifiedString();
          // FASTA files should have at most 60 characters of sequence info per line
          for (Size j = 0; j < seq.size(); j += 60)
          {
            Size k = min(j + 60, seq.size());
            fasta << string(seq[j], seq[k]) << endl;
          }
        }
      }
    }
    else
    {
      writeLog_("Unsupported output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }
    logger.endProgress();


    return EXECUTION_OK;
  }
Exemplo n.º 16
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in(getStringOption_("in"));
    String out(getStringOption_("out"));
    String pair_in(getStringOption_("pair_in"));
    String feature_out(getStringOption_("feature_out"));
    double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance"));
    double RT_tolerance(getDoubleOption_("RT_tolerance"));
    double expansion_range(getDoubleOption_("expansion_range"));
    Size max_isotope(getIntOption_("max_isotope"));
    Int debug(getIntOption_("debug"));

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile().load(in, exp);
    exp.sortSpectra();
    exp.updateRanges();

    // read pair file
    ifstream is(pair_in.c_str());
    String line;
    vector<SILAC_pair> pairs;
    while (getline(is, line))
    {
      line.trim();
      if (line.empty() || line[0] == '#')
      {
        continue;
      }
      vector<String> split;
      line.split(' ', split);
      if (split.size() != 4)
      {
        cerr << "missformated line ('" << line << "') should be (space separated) 'm/z-light m/z-heavy charge rt'" << endl;
      }
      SILAC_pair p;
      p.mz_light = split[0].toDouble();
      p.mz_heavy = split[1].toDouble();
      p.charge = split[2].toInt();
      p.rt = split[3].toDouble();
      pairs.push_back(p);
    }
    is.close();

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------


    ConsensusMap results_map;
    results_map.getFileDescriptions()[0].label = "light";
    results_map.getFileDescriptions()[0].filename = in;
    results_map.getFileDescriptions()[1].label = "heavy";
    results_map.getFileDescriptions()[1].filename = in;

    FeatureFinderAlgorithmIsotopeWavelet iso_ff;
    Param ff_param(iso_ff.getParameters());
    ff_param.setValue("max_charge", 3);
    ff_param.setValue("intensity_threshold", -1.0);
    iso_ff.setParameters(ff_param);

    FeatureFinder ff;
    ff.setLogType(ProgressLogger::NONE);

    vector<SILACQuantitation> quantlets;
    FeatureMap all_features;
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->size() == 0 || it->getMSLevel() != 1 || !it->getInstrumentSettings().getZoomScan())
      {
        continue;
      }

      PeakSpectrum new_spec = *it;

      // get spacing from data
      double min_spacing(numeric_limits<double>::max());
      double last_mz(0);
      for (PeakSpectrum::ConstIterator pit = new_spec.begin(); pit != new_spec.end(); ++pit)
      {
        if (pit->getMZ() - last_mz < min_spacing)
        {
          min_spacing = pit->getMZ() - last_mz;
        }
        last_mz = pit->getMZ();
      }
      writeDebug_("Min-spacing=" + String(min_spacing), 1);

      // split the spectrum into two subspectra, by using different hypothesis of
      // the SILAC pairs
      Size idx = 0;
      for (vector<SILAC_pair>::const_iterator pit = pairs.begin(); pit != pairs.end(); ++pit, ++idx)
      {
        // in RT window?
        if (fabs(it->getRT() - pit->rt) >= RT_tolerance)
        {
          continue;
        }

        // now excise the two ranges for the pair, complete isotope distributions of both, light and heavy
        PeakSpectrum light_spec, heavy_spec;
        light_spec.setRT(it->getRT());
        heavy_spec.setRT(it->getRT());
        for (PeakSpectrum::ConstIterator sit = it->begin(); sit != it->end(); ++sit)
        {
          double mz(sit->getMZ());
          if (mz - (pit->mz_light - precursor_mass_tolerance) > 0 &&
              (pit->mz_light + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz  > 0)
          {
            light_spec.push_back(*sit);
          }

          if (mz - (pit->mz_heavy - precursor_mass_tolerance) > 0 &&
              (pit->mz_heavy + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz  > 0)
          {
            heavy_spec.push_back(*sit);
          }
        }

        // expand light spectrum
        Peak1D p;
        p.setIntensity(0);

        if (light_spec.size() > 0)
        {
          double lower_border = light_spec.begin()->getMZ() - expansion_range;
          for (double pos = light_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing)
          {
            p.setMZ(pos);
            light_spec.insert(light_spec.begin(), p);
          }

          double upper_border = light_spec.begin()->getMZ() - expansion_range;
          for (double pos = light_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing)
          {
            p.setMZ(pos);
            light_spec.push_back(p);
          }
        }

        if (heavy_spec.size() > 0)
        {
          // expand heavy spectrum
          double lower_border = heavy_spec.begin()->getMZ() - expansion_range;
          for (double pos = heavy_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing)
          {
            p.setMZ(pos);
            heavy_spec.insert(heavy_spec.begin(), p);
          }

          double upper_border = heavy_spec.begin()->getMZ() - expansion_range;
          for (double pos = heavy_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing)
          {
            p.setMZ(pos);
            heavy_spec.push_back(p);
          }
        }

        // create experiments for feature finding
        PeakMap new_exp_light, new_exp_heavy;
        new_exp_light.addSpectrum(light_spec);
        new_exp_heavy.addSpectrum(heavy_spec);

        if (debug > 9)
        {
          MzMLFile().store(String(it->getRT()) + "_debugging_light.mzML", new_exp_light);
          MzMLFile().store(String(it->getRT()) + "_debugging_heavy.mzML", new_exp_heavy);
        }

        writeDebug_("Spectrum-id: " + it->getNativeID() + " @ " + String(it->getRT()) + "s", 1);

        new_exp_light.updateRanges();
        new_exp_heavy.updateRanges();

        FeatureMap feature_map_light, feature_map_heavy, seeds;
        if (light_spec.size() > 0)
        {
          ff.run("isotope_wavelet", new_exp_light, feature_map_light, ff_param, seeds);
        }
        writeDebug_("#light_features=" + String(feature_map_light.size()), 1);
        if (heavy_spec.size() > 0)
        {
          ff.run("isotope_wavelet", new_exp_heavy, feature_map_heavy, ff_param, seeds);
        }
        writeDebug_("#heavy_features=" + String(feature_map_heavy.size()), 1);

        // search if feature maps to m/z value of pair
        vector<MatchedFeature> light, heavy;
        for (FeatureMap::const_iterator fit = feature_map_light.begin(); fit != feature_map_light.end(); ++fit)
        {
          all_features.push_back(*fit);
          light.push_back(MatchedFeature(*fit, idx));
        }
        for (FeatureMap::const_iterator fit = feature_map_heavy.begin(); fit != feature_map_heavy.end(); ++fit)
        {
          all_features.push_back(*fit);
          heavy.push_back(MatchedFeature(*fit, idx));
        }

        if (!heavy.empty() && !light.empty())
        {
          writeDebug_("Finding best feature pair out of " + String(light.size()) + " light and " + String(heavy.size()) + " heavy matching features.", 1);
          // now find "good" matches, means the pair with the smallest m/z deviation
          Feature best_light, best_heavy;
          double best_deviation(numeric_limits<double>::max());
          Size best_idx(pairs.size());
          for (vector<MatchedFeature>::const_iterator fit1 = light.begin(); fit1 != light.end(); ++fit1)
          {
            for (vector<MatchedFeature>::const_iterator fit2 = heavy.begin(); fit2 != heavy.end(); ++fit2)
            {
              if (fit1->idx != fit2->idx || fit1->f.getCharge() != fit2->f.getCharge() ||
                  fabs(fit1->f.getMZ() - pairs[fit1->idx].mz_light) > precursor_mass_tolerance ||
                  fabs(fit2->f.getMZ() - pairs[fit2->idx].mz_heavy) > precursor_mass_tolerance)
              {
                continue;
              }
              double deviation(0);
              deviation = fabs((fit1->f.getMZ() - pairs[fit1->idx].mz_light) - (fit2->f.getMZ() - pairs[fit2->idx].mz_heavy));
              if (deviation < best_deviation && deviation < precursor_mass_tolerance)
              {
                best_light = fit1->f;
                best_heavy = fit2->f;
                best_idx = fit1->idx;
              }
            }
          }

          if (best_idx == pairs.size())
          {
            continue;
          }

          writeDebug_("Ratio: " + String(best_heavy.getIntensity() / best_light.getIntensity()), 1);
          ConsensusFeature SILAC_feature;
          SILAC_feature.setMZ((best_light.getMZ() + best_heavy.getMZ()) / 2.0);
          SILAC_feature.setRT((best_light.getRT() + best_heavy.getRT()) / 2.0);
          SILAC_feature.insert(0, best_light);
          SILAC_feature.insert(1, best_heavy);
          results_map.push_back(SILAC_feature);
          quantlets.push_back(SILACQuantitation(best_light.getIntensity(), best_heavy.getIntensity(), best_idx));
        }
      }
    }

    // now calculate the final quantitation values from the quantlets
    Map<Size, vector<SILACQuantitation> > idx_to_quantlet;
    for (vector<SILACQuantitation>::const_iterator it = quantlets.begin(); it != quantlets.end(); ++it)
    {
      idx_to_quantlet[it->idx].push_back(*it);
    }

    for (Map<Size, vector<SILACQuantitation> >::ConstIterator it1 = idx_to_quantlet.begin(); it1 != idx_to_quantlet.end(); ++it1)
    {
      SILAC_pair silac_pair = pairs[it1->first];

      // simply add up all intensities and calculate the final ratio
      double light_sum(0), heavy_sum(0);
      vector<double> light_ints, heavy_ints, ratios;
      for (vector<SILACQuantitation>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
      {
        light_sum += it2->light_intensity;
        light_ints.push_back(it2->light_intensity);
        heavy_sum += it2->heavy_intensity;
        heavy_ints.push_back(it2->heavy_intensity);
        ratios.push_back(it2->heavy_intensity / it2->light_intensity * (it2->heavy_intensity + it2->light_intensity));
      }

      double absdev_ratios = Math::absdev(ratios.begin(), ratios.begin() + (ratios.size()) / (heavy_sum + light_sum));
      cout << "Ratio: " << silac_pair.mz_light << " <-> " << silac_pair.mz_heavy << " @ " << silac_pair.rt << " s, ratio(h/l) " << heavy_sum / light_sum << " +/- " << absdev_ratios << " (#scans for quantation: " << String(it1->second.size()) << " )" << endl;
    }


    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    if (feature_out != "")
    {
      FeatureXMLFile().store(feature_out, all_features);
    }
    writeDebug_("Writing output", 1);
    ConsensusXMLFile().store(out, results_map);

    return EXECUTION_OK;
  }
Exemplo n.º 17
0
START_SECTION(virtual ~MascotGenericFile())
{
  delete ptr;
}
END_SECTION

ptr = new MascotGenericFile();

START_SECTION((template < typename MapType > void load(const String &filename, MapType &exp)))
{
  PeakMap exp;
  ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp);
  TEST_EQUAL(exp.size(), 1)

  TEST_EQUAL(exp.begin()->size(), 9)
}
END_SECTION

START_SECTION((void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact = false)))
{
  PeakMap exp;
  ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp);
  
  // handling of modifications:
  Param params = ptr->getParameters();
  params.setValue("fixed_modifications", ListUtils::create<String>("Carbamidomethyl (C),Phospho (S)"));
  params.setValue("variable_modifications", ListUtils::create<String>("Oxidation (M),Deamidated (N),Deamidated (Q)"));
  ptr->setParameters(params);

  stringstream ss;
Exemplo n.º 18
0
  void FeatureFinder::run(const String& algorithm_name, PeakMap& input_map, FeatureMap& features, const Param& param, const FeatureMap& seeds)
  {
    // Nothing to do if there is no data
    if ((algorithm_name != "mrm" && input_map.empty()) || (algorithm_name == "mrm" && input_map.getChromatograms().empty()))
    {
      features.clear(true);
      return;
    }

    // check input
    {
      // We need updated ranges => check number of peaks
      if (algorithm_name != "mrm" && input_map.getSize() == 0)
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder needs updated ranges on input map. Aborting.");
      }

      // We need MS1 data only => check levels
      if (algorithm_name != "mrm" && (input_map.getMSLevels().size() != 1 || input_map.getMSLevels()[0] != 1))
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data. Aborting.");
      }

      //Check if the peaks are sorted according to m/z
      if (!input_map.isSorted(true))
      {
        LOG_WARN << "Input map is not sorted by RT and m/z! This is done now, before applying the algorithm!" << std::endl;
        input_map.sortSpectra(true);
        input_map.sortChromatograms(true);
      }
      for (Size s = 0; s < input_map.size(); ++s)
      {
        if (input_map[s].empty())
          continue;
        if (input_map[s][0].getMZ() < 0)
        {
          throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on spectra that contain peaks with positive m/z values. Filter the data accordingly beforehand! Aborting.");
        }
      }
    }

    // initialize
    if (algorithm_name != "mrm" && algorithm_name != "centroided")
    {
      // Resize peak flag vector
      flags_.resize(input_map.size());
      for (Size i = 0; i < input_map.size(); ++i)
      {
        flags_[i].assign(input_map[i].size(), UNUSED);
      }
    }

    // do the work
    if (algorithm_name != "none")
    {
      FeatureFinderAlgorithm* algorithm = Factory<FeatureFinderAlgorithm>::create(algorithm_name);
      algorithm->setParameters(param);
      algorithm->setData(input_map, features, *this);
      algorithm->setSeeds(seeds);
      algorithm->run();
      delete(algorithm);
    }

    if (algorithm_name != "mrm") // mrm  works on chromatograms; the next section is only for conventional data
    {
      //report RT apex spectrum index and native ID for each feature
      for (Size i = 0; i < features.size(); ++i)
      {
        //index
        Size spectrum_index = input_map.RTBegin(features[i].getRT()) - input_map.begin();
        features[i].setMetaValue("spectrum_index", spectrum_index);
        //native id
        if (spectrum_index < input_map.size())
        {
          String native_id = input_map[spectrum_index].getNativeID();
          features[i].setMetaValue("spectrum_native_id", native_id);
        }
        else
        {
          /// @todo that happens sometimes using IsotopeWaveletFeatureFinder (Rene, Marc, Andreas, Clemens)
          std::cerr << "FeatureFinderAlgorithm_impl, line=" << __LINE__ << "; FixMe this cannot be, but happens" << std::endl;
        }
      }
    }
  }
Exemplo n.º 19
0
	
	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), 37.5)

	e_ptr->filterSpectrum(spec);
	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), sqrt(37.5))
END_SECTION

START_SECTION((void filterPeakMap(PeakMap& exp)))
	DTAFile dta_file;
  PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	PeakMap pm;
	pm.addSpectrum(spec);

	TEST_REAL_SIMILAR((pm.begin()->begin() + 40)->getIntensity(), 37.5)

	e_ptr->filterPeakMap(pm);
	TEST_REAL_SIMILAR((pm.begin()->begin() + 40)->getIntensity(), sqrt(37.5))
END_SECTION

START_SECTION((void filterPeakSpectrum(PeakSpectrum& spectrum)))
	DTAFile dta_file;
  PeakSpectrum spec;
  dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), 37.5)

	e_ptr->filterPeakSpectrum(spec);
	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), sqrt(37.5))
END_SECTION
Exemplo n.º 20
0
  ExitCodes main_(int, const char**)
  {
    // instance specific location of settings in INI file (e.g. 'TOPP_Skeleton:1:')
    String ini_location;
    // path to the log file
    String logfile(getStringOption_("log"));
    String xtandem_executable(getStringOption_("xtandem_executable"));
    String inputfile_name;
    String outputfile_name;

    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------

    inputfile_name = getStringOption_("in");
    writeDebug_(String("Input file: ") + inputfile_name, 1);
    if (inputfile_name == "")
    {
      writeLog_("No input file specified. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    outputfile_name = getStringOption_("out");
    writeDebug_(String("Output file: ") + outputfile_name, 1);
    if (outputfile_name == "")
    {
      writeLog_("No output file specified. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    // write input xml file
    String temp_directory = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files
    {
      QDir d;
      d.mkpath(temp_directory.toQString());
    }

    String input_filename(temp_directory + "_tandem_input_file.xml");
    String tandem_input_filename(temp_directory + "_tandem_input_file.mzData");
    String tandem_output_filename(temp_directory + "_tandem_output_file.xml");
    String tandem_taxonomy_filename(temp_directory + "_tandem_taxonomy_file.xml");

    //-------------------------------------------------------------
    // Validate user parameters
    //-------------------------------------------------------------
    if (getIntOption_("min_precursor_charge") > getIntOption_("max_precursor_charge"))
    {
      LOG_ERROR << "Given charge range is invalid: max_precursor_charge needs to be >= min_precursor_charge." << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    String db_name(getStringOption_("database"));
    if (!File::readable(db_name))
    {
      String full_db_name;
      try
      {
        full_db_name = File::findDatabase(db_name);
      }
      catch (...)
      {
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
      db_name = full_db_name;
    }


    PeakMap exp;
    MzMLFile mzml_file;
    mzml_file.getOptions().addMSLevel(2); // only load msLevel 2
    mzml_file.setLogType(log_type_);
    mzml_file.load(inputfile_name, exp);

    if (exp.getSpectra().empty())
    {
      throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file.");
    }

    // determine type of spectral data (profile or centroided)
    SpectrumSettings::SpectrumType spectrum_type = exp[0].getType();

    if (spectrum_type == SpectrumSettings::RAWDATA)
    {
      if (!getFlag_("force"))
      {
        throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag.");
      }
    }

    // we need to replace the native id with a simple numbering schema, to be able to
    // map the IDs back to the spectra (RT, and MZ information)
    Size native_id(0);
    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      it->setNativeID(++native_id);
    }

    // We store the file in mzData file format, because MGF files somehow produce in most
    // of the cases IDs with charge 2+. We do not use the input file directly
    // because XTandem sometimes stumbles over misleading substrings in the filename,
    // e.g. mzXML ...
    MzDataFile mzdata_outfile;
    mzdata_outfile.store(tandem_input_filename, exp);

    XTandemInfile infile;
    infile.setInputFilename(tandem_input_filename);
    infile.setOutputFilename(tandem_output_filename);

    ofstream tax_out(tandem_taxonomy_filename.c_str());
    tax_out << "<?xml version=\"1.0\"?>" << "\n";
    tax_out << "\t<bioml label=\"x! taxon-to-file matching list\">" << "\n";
    tax_out << "\t\t<taxon label=\"OpenMS_dummy_taxonomy\">" << "\n";
    tax_out << "\t\t\t<file format=\"peptide\" URL=\"" << db_name << "\" />" << "\n";
    tax_out << "\t</taxon>" << "\n";
    tax_out << "</bioml>" << "\n";
    tax_out.close();

    infile.setTaxonomyFilename(tandem_taxonomy_filename);

    if (getStringOption_("precursor_error_units") == "Da")
    {
      infile.setPrecursorMassErrorUnit(XTandemInfile::DALTONS);
    }
    else
    {
      infile.setPrecursorMassErrorUnit(XTandemInfile::PPM);
    }

    if (getStringOption_("fragment_error_units") == "Da")
    {
      infile.setFragmentMassErrorUnit(XTandemInfile::DALTONS);
    }
    else
    {
      infile.setFragmentMassErrorUnit(XTandemInfile::PPM);
    }

    if (getStringOption_("default_input_file") != "")
    {
      infile.load(getStringOption_("default_input_file"));
      infile.setDefaultParametersFilename(getStringOption_("default_input_file"));
    }
    else
    {
      String default_file = File::find("CHEMISTRY/XTandem_default_input.xml");
      infile.load(default_file);
      infile.setDefaultParametersFilename(default_file);
    }

    infile.setPrecursorMassTolerancePlus(getDoubleOption_("precursor_mass_tolerance"));
    infile.setPrecursorMassToleranceMinus(getDoubleOption_("precursor_mass_tolerance"));
    infile.setFragmentMassTolerance(getDoubleOption_("fragment_mass_tolerance"));
    infile.setMaxPrecursorCharge(getIntOption_("max_precursor_charge"));
    infile.setNumberOfThreads(getIntOption_("threads"));
    infile.setModifications(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications")));
    infile.setTaxon("OpenMS_dummy_taxonomy");
    infile.setOutputResults(getStringOption_("output_results"));
    infile.setMaxValidEValue(getDoubleOption_("max_valid_expect"));
    infile.setCleavageSite(getStringOption_("cleavage_site"));
    infile.setNumberOfMissedCleavages(getIntOption_("missed_cleavages"));
    infile.setRefine(getFlag_("refinement"));
    infile.setSemiCleavage(getFlag_("semi_cleavage"));
    bool allow_isotope_error = getStringOption_("allow_isotope_error") == "yes" ? true : false;
    infile.setAllowIsotopeError(allow_isotope_error);

    infile.write(input_filename);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    int status = QProcess::execute(xtandem_executable.toQString(), QStringList(input_filename.toQString())); // does automatic escaping etc...
    if (status != 0)
    {
      writeLog_("XTandem problem. Aborting! Calling command was: '" + xtandem_executable + " \"" + input_filename + "\"'.\nDoes the !XTandem executable exist?");
      // clean temporary files
      if (this->debug_level_ < 2)
      {
        File::removeDirRecursively(temp_directory);
        LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl;
      }
      else
      {
        LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl;
      }
      return EXTERNAL_PROGRAM_ERROR;
    }

    vector<ProteinIdentification> protein_ids;
    ProteinIdentification protein_id;
    vector<PeptideIdentification> peptide_ids;

    // read the output of X!Tandem and write it to idXML
    XTandemXMLFile tandem_output;
    tandem_output.setModificationDefinitionsSet(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications")));
    // find the file, because XTandem extends the filename with a timestamp we do not know (exactly)
    StringList files;
    File::fileList(temp_directory, "_tandem_output_file*.xml", files);
    if (files.size() != 1)
    {
      throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, tandem_output_filename);
    }
    tandem_output.load(temp_directory + files[0], protein_id, peptide_ids);

    // now put the RTs into the peptide_ids from the spectrum ids
    for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it != peptide_ids.end(); ++it)
    {
      UInt id = (Int)it->getMetaValue("spectrum_id");
      --id; // native IDs were written 1-based
      if (id < exp.size())
      {
        it->setRT(exp[id].getRT());
        double pre_mz(0.0);
        if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ();
        it->setMZ(pre_mz);
        //it->removeMetaValue("spectrum_id");
      }
      else
      {
        LOG_ERROR << "XTandemAdapter: Error: id '" << id << "' not found in peak map!" << endl;
      }
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    // handle the search parameters
    ProteinIdentification::SearchParameters search_parameters;
    search_parameters.db = getStringOption_("database");
    search_parameters.charges = "+" + String(getIntOption_("min_precursor_charge")) + "-+" + String(getIntOption_("max_precursor_charge"));

    ProteinIdentification::PeakMassType mass_type = ProteinIdentification::MONOISOTOPIC;
    search_parameters.mass_type = mass_type;
    search_parameters.fixed_modifications = getStringList_("fixed_modifications");
    search_parameters.variable_modifications = getStringList_("variable_modifications");
    search_parameters.missed_cleavages = getIntOption_("missed_cleavages");
    search_parameters.peak_mass_tolerance = getDoubleOption_("fragment_mass_tolerance");
    search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance");

    protein_id.setSearchParameters(search_parameters);
    protein_id.setSearchEngineVersion("");
    protein_id.setSearchEngine("XTandem");

    protein_ids.push_back(protein_id);

    IdXMLFile().store(outputfile_name, protein_ids, peptide_ids);

    /// Deletion of temporary files
    if (this->debug_level_ < 2)
    {
      File::removeDirRecursively(temp_directory);
      LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl;
    }
    else
    {
      LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl;
    }

    // some stats
    LOG_INFO << "Statistics:\n"
             << "  identified MS2 spectra: " << peptide_ids.size() << " / " << exp.size() << " = " << int(peptide_ids.size() * 100.0 / exp.size()) << "% (with e-value < " << String(getDoubleOption_("max_valid_expect")) << ")" << std::endl;

    return EXECUTION_OK;
  }