void CompNovoIdentificationBase::getCIDSpectrumLight_(PeakSpectrum & spec, const String & sequence, DoubleReal prefix, DoubleReal suffix)
  {
    static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight();
    Peak1D p;
    DoubleReal b_pos(0.0 + prefix);
    DoubleReal y_pos(h2o_mass + suffix);
    for (Size i = 0; i != sequence.size() - 1; ++i)
    {
      char aa(sequence[i]);
      b_pos += aa_to_weight_[aa];

      char aa2(sequence[sequence.size() - i - 1]);
      y_pos += aa_to_weight_[aa2];

      if (b_pos > min_mz_ && b_pos < max_mz_)
      {
        p.setPosition(b_pos + Constants::PROTON_MASS_U);
        p.setIntensity(1.0f);
        spec.push_back(p);
      }

      if (y_pos > min_mz_ && y_pos < max_mz_)
      {
        p.setPosition(y_pos + Constants::PROTON_MASS_U);
        p.setIntensity(1.0f);
        spec.push_back(p);
      }
    }

    spec.sortByPosition();
    return;
  }
Exemplo n.º 2
0
void OpenSwathDataAccessHelper::convertToOpenMSSpectrum(const OpenSwath::SpectrumPtr sptr, OpenMS::MSSpectrum<> & spectrum)
{
    // recreate a spectrum from the data arrays!
    OpenSwath::BinaryDataArrayPtr mz_arr = sptr->getMZArray();
    OpenSwath::BinaryDataArrayPtr int_arr = sptr->getIntensityArray();
    spectrum.reserve(mz_arr->data.size());
    for (Size i = 0; i < mz_arr->data.size(); i++)
    {
        Peak1D p;
        p.setMZ(mz_arr->data[i]);
        p.setIntensity(int_arr->data[i]);
        spectrum.push_back(p);
    }
}
void getSwathFile(PeakMap& exp, int nr_swathes=32, bool ms1=true)
{
  if (ms1)
  {
    MSSpectrum s;
    s.setMSLevel(1);
    Peak1D p; p.setMZ(100); p.setIntensity(200);
    s.push_back(p);
    exp.addSpectrum(s);
  }
  for (int i = 0; i< nr_swathes; i++)
  {
    MSSpectrum s;
    s.setMSLevel(2);
    std::vector<Precursor> prec(1);
    prec[0].setIsolationWindowLowerOffset(12.5);
    prec[0].setIsolationWindowUpperOffset(12.5);
    prec[0].setMZ(400 + i*25 + 12.5);
    s.setPrecursors(prec);
    Peak1D p; p.setMZ(101 + i); p.setIntensity(201 + i);
    s.push_back(p);
    exp.addSpectrum(s);
  }
}
Exemplo n.º 4
0
bool IDEvaluationBase::getPoints(std::vector<PeptideIdentification>& peptides /* cannot be const, to avoid copy */,
                                 const std::vector<double>& q_value_thresholds, MSSpectrum<>& points)
{
    points.clear(true);

    FalseDiscoveryRate fdr;
    fdr.setParameters(param_.copy("fdr:", true));
    try
    {
        fdr.apply(peptides); // computes a q-value (if its params are correct)
    }
    catch (Exception::MissingInformation)
    {
        LOG_FATAL_ERROR << "Tool failed due to missing information (see above)." << std::endl;
        return false;
    }

    // get list of q-values and sort them
    std::vector<double> q_values;
    q_values.reserve(peptides.size());
    for (vector<PeptideIdentification>::iterator it = peptides.begin(); it != peptides.end(); ++it)
    {
        it->assignRanks();
        if (it->getHits().size() > 0)
            q_values.push_back(it->getHits()[0].getScore());
    }
    std::sort(q_values.begin(), q_values.end());


    for (Size i = 0; i < q_value_thresholds.size(); ++i)
    {
        // get position in sorted q-values where cutoff is reached
        std::vector<double>::iterator pos = std::upper_bound(q_values.begin(), q_values.end(), q_value_thresholds[i]);
        Peak1D p;
        p.setMZ(q_value_thresholds[i] * 100);
        p.setIntensity(std::distance(q_values.begin(), pos));
        points.push_back(p);
    }

    return true;
}
Exemplo n.º 5
0
  void PeakPickerHiRes::pick(const MSChromatogram& input, MSChromatogram& output, std::vector<PeakBoundary>& boundaries) const
  {
    // copy meta data of the input chromatogram
    output.clear(true);
    output.ChromatogramSettings::operator=(input);
    output.MetaInfoInterface::operator=(input);
    output.setName(input.getName());

    MSSpectrum input_spectrum;
    MSSpectrum output_spectrum;
    for (MSChromatogram::const_iterator it = input.begin(); it != input.end(); ++it)
    {
      Peak1D p;
      p.setMZ(it->getRT());
      p.setIntensity(it->getIntensity());
      input_spectrum.push_back(p);
    }

    pick(input_spectrum, output_spectrum, boundaries, false); // no spacing checks!

    for (MSSpectrum::const_iterator it = output_spectrum.begin(); it != output_spectrum.end(); ++it)
    {
      ChromatogramPeak p;
      p.setRT(it->getMZ());
      p.setIntensity(it->getIntensity());
      output.push_back(p);
    }

    // copy float data arrays (for FWHM)
    output.getFloatDataArrays().resize(output_spectrum.getFloatDataArrays().size());
    for (Size i = 0; i < output_spectrum.getFloatDataArrays().size(); ++i)
    {
      output.getFloatDataArrays()[i].insert(output.getFloatDataArrays()[i].begin(), output_spectrum.getFloatDataArrays()[i].begin(), output_spectrum.getFloatDataArrays()[i].end());
      output.getFloatDataArrays()[i].setName(output_spectrum.getFloatDataArrays()[i].getName());
    }
  }
Exemplo n.º 6
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in(getStringOption_("in"));
    String out(getStringOption_("out"));
    String pair_in(getStringOption_("pair_in"));
    String feature_out(getStringOption_("feature_out"));
    double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance"));
    double RT_tolerance(getDoubleOption_("RT_tolerance"));
    double expansion_range(getDoubleOption_("expansion_range"));
    Size max_isotope(getIntOption_("max_isotope"));
    Int debug(getIntOption_("debug"));

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile().load(in, exp);
    exp.sortSpectra();
    exp.updateRanges();

    // read pair file
    ifstream is(pair_in.c_str());
    String line;
    vector<SILAC_pair> pairs;
    while (getline(is, line))
    {
      line.trim();
      if (line.empty() || line[0] == '#')
      {
        continue;
      }
      vector<String> split;
      line.split(' ', split);
      if (split.size() != 4)
      {
        cerr << "missformated line ('" << line << "') should be (space separated) 'm/z-light m/z-heavy charge rt'" << endl;
      }
      SILAC_pair p;
      p.mz_light = split[0].toDouble();
      p.mz_heavy = split[1].toDouble();
      p.charge = split[2].toInt();
      p.rt = split[3].toDouble();
      pairs.push_back(p);
    }
    is.close();

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------


    ConsensusMap results_map;
    results_map.getFileDescriptions()[0].label = "light";
    results_map.getFileDescriptions()[0].filename = in;
    results_map.getFileDescriptions()[1].label = "heavy";
    results_map.getFileDescriptions()[1].filename = in;

    FeatureFinderAlgorithmIsotopeWavelet iso_ff;
    Param ff_param(iso_ff.getParameters());
    ff_param.setValue("max_charge", 3);
    ff_param.setValue("intensity_threshold", -1.0);
    iso_ff.setParameters(ff_param);

    FeatureFinder ff;
    ff.setLogType(ProgressLogger::NONE);

    vector<SILACQuantitation> quantlets;
    FeatureMap all_features;
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->size() == 0 || it->getMSLevel() != 1 || !it->getInstrumentSettings().getZoomScan())
      {
        continue;
      }

      PeakSpectrum new_spec = *it;

      // get spacing from data
      double min_spacing(numeric_limits<double>::max());
      double last_mz(0);
      for (PeakSpectrum::ConstIterator pit = new_spec.begin(); pit != new_spec.end(); ++pit)
      {
        if (pit->getMZ() - last_mz < min_spacing)
        {
          min_spacing = pit->getMZ() - last_mz;
        }
        last_mz = pit->getMZ();
      }
      writeDebug_("Min-spacing=" + String(min_spacing), 1);

      // split the spectrum into two subspectra, by using different hypothesis of
      // the SILAC pairs
      Size idx = 0;
      for (vector<SILAC_pair>::const_iterator pit = pairs.begin(); pit != pairs.end(); ++pit, ++idx)
      {
        // in RT window?
        if (fabs(it->getRT() - pit->rt) >= RT_tolerance)
        {
          continue;
        }

        // now excise the two ranges for the pair, complete isotope distributions of both, light and heavy
        PeakSpectrum light_spec, heavy_spec;
        light_spec.setRT(it->getRT());
        heavy_spec.setRT(it->getRT());
        for (PeakSpectrum::ConstIterator sit = it->begin(); sit != it->end(); ++sit)
        {
          double mz(sit->getMZ());
          if (mz - (pit->mz_light - precursor_mass_tolerance) > 0 &&
              (pit->mz_light + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz  > 0)
          {
            light_spec.push_back(*sit);
          }

          if (mz - (pit->mz_heavy - precursor_mass_tolerance) > 0 &&
              (pit->mz_heavy + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz  > 0)
          {
            heavy_spec.push_back(*sit);
          }
        }

        // expand light spectrum
        Peak1D p;
        p.setIntensity(0);

        if (light_spec.size() > 0)
        {
          double lower_border = light_spec.begin()->getMZ() - expansion_range;
          for (double pos = light_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing)
          {
            p.setMZ(pos);
            light_spec.insert(light_spec.begin(), p);
          }

          double upper_border = light_spec.begin()->getMZ() - expansion_range;
          for (double pos = light_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing)
          {
            p.setMZ(pos);
            light_spec.push_back(p);
          }
        }

        if (heavy_spec.size() > 0)
        {
          // expand heavy spectrum
          double lower_border = heavy_spec.begin()->getMZ() - expansion_range;
          for (double pos = heavy_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing)
          {
            p.setMZ(pos);
            heavy_spec.insert(heavy_spec.begin(), p);
          }

          double upper_border = heavy_spec.begin()->getMZ() - expansion_range;
          for (double pos = heavy_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing)
          {
            p.setMZ(pos);
            heavy_spec.push_back(p);
          }
        }

        // create experiments for feature finding
        PeakMap new_exp_light, new_exp_heavy;
        new_exp_light.addSpectrum(light_spec);
        new_exp_heavy.addSpectrum(heavy_spec);

        if (debug > 9)
        {
          MzMLFile().store(String(it->getRT()) + "_debugging_light.mzML", new_exp_light);
          MzMLFile().store(String(it->getRT()) + "_debugging_heavy.mzML", new_exp_heavy);
        }

        writeDebug_("Spectrum-id: " + it->getNativeID() + " @ " + String(it->getRT()) + "s", 1);

        new_exp_light.updateRanges();
        new_exp_heavy.updateRanges();

        FeatureMap feature_map_light, feature_map_heavy, seeds;
        if (light_spec.size() > 0)
        {
          ff.run("isotope_wavelet", new_exp_light, feature_map_light, ff_param, seeds);
        }
        writeDebug_("#light_features=" + String(feature_map_light.size()), 1);
        if (heavy_spec.size() > 0)
        {
          ff.run("isotope_wavelet", new_exp_heavy, feature_map_heavy, ff_param, seeds);
        }
        writeDebug_("#heavy_features=" + String(feature_map_heavy.size()), 1);

        // search if feature maps to m/z value of pair
        vector<MatchedFeature> light, heavy;
        for (FeatureMap::const_iterator fit = feature_map_light.begin(); fit != feature_map_light.end(); ++fit)
        {
          all_features.push_back(*fit);
          light.push_back(MatchedFeature(*fit, idx));
        }
        for (FeatureMap::const_iterator fit = feature_map_heavy.begin(); fit != feature_map_heavy.end(); ++fit)
        {
          all_features.push_back(*fit);
          heavy.push_back(MatchedFeature(*fit, idx));
        }

        if (!heavy.empty() && !light.empty())
        {
          writeDebug_("Finding best feature pair out of " + String(light.size()) + " light and " + String(heavy.size()) + " heavy matching features.", 1);
          // now find "good" matches, means the pair with the smallest m/z deviation
          Feature best_light, best_heavy;
          double best_deviation(numeric_limits<double>::max());
          Size best_idx(pairs.size());
          for (vector<MatchedFeature>::const_iterator fit1 = light.begin(); fit1 != light.end(); ++fit1)
          {
            for (vector<MatchedFeature>::const_iterator fit2 = heavy.begin(); fit2 != heavy.end(); ++fit2)
            {
              if (fit1->idx != fit2->idx || fit1->f.getCharge() != fit2->f.getCharge() ||
                  fabs(fit1->f.getMZ() - pairs[fit1->idx].mz_light) > precursor_mass_tolerance ||
                  fabs(fit2->f.getMZ() - pairs[fit2->idx].mz_heavy) > precursor_mass_tolerance)
              {
                continue;
              }
              double deviation(0);
              deviation = fabs((fit1->f.getMZ() - pairs[fit1->idx].mz_light) - (fit2->f.getMZ() - pairs[fit2->idx].mz_heavy));
              if (deviation < best_deviation && deviation < precursor_mass_tolerance)
              {
                best_light = fit1->f;
                best_heavy = fit2->f;
                best_idx = fit1->idx;
              }
            }
          }

          if (best_idx == pairs.size())
          {
            continue;
          }

          writeDebug_("Ratio: " + String(best_heavy.getIntensity() / best_light.getIntensity()), 1);
          ConsensusFeature SILAC_feature;
          SILAC_feature.setMZ((best_light.getMZ() + best_heavy.getMZ()) / 2.0);
          SILAC_feature.setRT((best_light.getRT() + best_heavy.getRT()) / 2.0);
          SILAC_feature.insert(0, best_light);
          SILAC_feature.insert(1, best_heavy);
          results_map.push_back(SILAC_feature);
          quantlets.push_back(SILACQuantitation(best_light.getIntensity(), best_heavy.getIntensity(), best_idx));
        }
      }
    }

    // now calculate the final quantitation values from the quantlets
    Map<Size, vector<SILACQuantitation> > idx_to_quantlet;
    for (vector<SILACQuantitation>::const_iterator it = quantlets.begin(); it != quantlets.end(); ++it)
    {
      idx_to_quantlet[it->idx].push_back(*it);
    }

    for (Map<Size, vector<SILACQuantitation> >::ConstIterator it1 = idx_to_quantlet.begin(); it1 != idx_to_quantlet.end(); ++it1)
    {
      SILAC_pair silac_pair = pairs[it1->first];

      // simply add up all intensities and calculate the final ratio
      double light_sum(0), heavy_sum(0);
      vector<double> light_ints, heavy_ints, ratios;
      for (vector<SILACQuantitation>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2)
      {
        light_sum += it2->light_intensity;
        light_ints.push_back(it2->light_intensity);
        heavy_sum += it2->heavy_intensity;
        heavy_ints.push_back(it2->heavy_intensity);
        ratios.push_back(it2->heavy_intensity / it2->light_intensity * (it2->heavy_intensity + it2->light_intensity));
      }

      double absdev_ratios = Math::absdev(ratios.begin(), ratios.begin() + (ratios.size()) / (heavy_sum + light_sum));
      cout << "Ratio: " << silac_pair.mz_light << " <-> " << silac_pair.mz_heavy << " @ " << silac_pair.rt << " s, ratio(h/l) " << heavy_sum / light_sum << " +/- " << absdev_ratios << " (#scans for quantation: " << String(it1->second.size()) << " )" << endl;
    }


    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    if (feature_out != "")
    {
      FeatureXMLFile().store(feature_out, all_features);
    }
    writeDebug_("Writing output", 1);
    ConsensusXMLFile().store(out, results_map);

    return EXECUTION_OK;
  }
Exemplo n.º 7
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    StringList in = getStringList_("in");
    String edta = getStringOption_("pos");
    String out = getStringOption_("out");
    String out_sep = getStringOption_("out_separator");
    String out_TIC_debug = getStringOption_("auto_rt:out_debug_TIC");

    StringList in_header = getStringList_("in_header");


    // number of out_debug_TIC files and input files must be identical
    /*if (out_TIC_debug.size() > 0 && in.size() != out_TIC_debug.size())
    {
        LOG_FATAL_ERROR << "Error: number of input file 'in' and auto_rt:out_debug_TIC files must be identical!" << std::endl;
        return ILLEGAL_PARAMETERS;
    }*/

    // number of header files and input files must be identical
    if (in_header.size() > 0 && in.size() != in_header.size())
    {
      LOG_FATAL_ERROR << "Error: number of input file 'in' and 'in_header' files must be identical!" << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    if (!getFlag_("auto_rt:enabled") && !out_TIC_debug.empty())
    {
      LOG_FATAL_ERROR << "Error: TIC output file requested, but auto_rt is not enabled! Either do not request the file or switch on 'auto_rt:enabled'." << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    double rttol = getDoubleOption_("rt_tol");
    double mztol = getDoubleOption_("mz_tol");
    Size rt_collect = getIntOption_("rt_collect");

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    MzMLFile mzml_file;
    mzml_file.setLogType(log_type_);
    MSExperiment<Peak1D> exp, exp_pp;

    EDTAFile ed;
    ConsensusMap cm;
    ed.load(edta, cm);

    StringList tf_single_header0, tf_single_header1, tf_single_header2; // header content, for each column

    std::vector<String> vec_single; // one line for each compound, multiple columns per experiment
    vec_single.resize(cm.size());
    for (Size fi = 0; fi < in.size(); ++fi)
    {
      // load raw data
      mzml_file.load(in[fi], exp);
      exp.sortSpectra(true);

      if (exp.empty())
      {
        LOG_WARN << "The given file does not contain any conventional peak data, but might"
                    " contain chromatograms. This tool currently cannot handle them, sorry." << std::endl;
        return INCOMPATIBLE_INPUT_DATA;
      }

      // try to detect RT peaks (only for the first input file -- all others should align!)
      // cm.size() might change in here...
      if (getFlag_("auto_rt:enabled") && fi == 0)
      {
        ConsensusMap cm_local = cm; // we might have different RT peaks for each map if 'auto_rt' is enabled
        cm.clear(false); // reset global list (about to be filled)

        // compute TIC
        MSChromatogram<> tic = exp.getTIC();
        MSSpectrum<> tics, tic_gf, tics_pp, tics_sn;
        for (Size ic = 0; ic < tic.size(); ++ic)
        { // rewrite Chromatogram to MSSpectrum (GaussFilter requires it)
          Peak1D peak;
          peak.setMZ(tic[ic].getRT());
          peak.setIntensity(tic[ic].getIntensity());
          tics.push_back(peak);
        }
        // smooth (no PP_CWT here due to efficiency reasons -- large FWHM take longer!)
        double fwhm = getDoubleOption_("auto_rt:FHWM");
        GaussFilter gf;
        Param p = gf.getParameters();
        p.setValue("gaussian_width", fwhm * 2); // wider than FWHM, just to be sure we have a fully smoothed peak. Merging two peaks is unlikely
        p.setValue("use_ppm_tolerance", "false");
        gf.setParameters(p);
        tic_gf = tics;
        gf.filter(tic_gf);
        // pick peaks
        PeakPickerHiRes pp;
        p = pp.getParameters();
        p.setValue("signal_to_noise", getDoubleOption_("auto_rt:SNThreshold"));
        pp.setParameters(p);
        pp.pick(tic_gf, tics_pp);

        if (tics_pp.size())
        {
          LOG_INFO << "Found " << tics_pp.size() << " auto-rt peaks at: ";
          for (Size ipp = 0; ipp != tics_pp.size(); ++ipp) LOG_INFO << " " << tics_pp[ipp].getMZ();
        }
        else
        {
          LOG_INFO << "Found no auto-rt peaks. Change threshold parameters!";
        }
        LOG_INFO << std::endl;

        if (!out_TIC_debug.empty()) // if debug file was given
        { // store intermediate steps for debug
          MSExperiment<> out_debug;
          out_debug.addChromatogram(toChromatogram(tics));
          out_debug.addChromatogram(toChromatogram(tic_gf));

          SignalToNoiseEstimatorMedian<MSSpectrum<> > snt;
          snt.init(tics);
          for (Size is = 0; is < tics.size(); ++is)
          {
            Peak1D peak;
            peak.setMZ(tic[is].getMZ());
            peak.setIntensity(snt.getSignalToNoise(tics[is]));
            tics_sn.push_back(peak);
          }
          out_debug.addChromatogram(toChromatogram(tics_sn));

          out_debug.addChromatogram(toChromatogram(tics_pp));
          // get rid of "native-id" missing warning
          for (Size id = 0; id < out_debug.size(); ++id) out_debug[id].setNativeID(String("spectrum=") + id);

          mzml_file.store(out_TIC_debug, out_debug);
          LOG_DEBUG << "Storing debug AUTO-RT: " << out_TIC_debug << std::endl;
        }

        // add target EICs: for each m/z with no/negative RT, add all combinations of that m/z with auto-RTs
        // duplicate m/z entries will be ignored!
        // all other lines with positive RT values are copied unaffected
        //do not allow doubles
        std::set<double> mz_doubles;
        for (ConsensusMap::Iterator cit = cm_local.begin(); cit != cm_local.end(); ++cit)
        {
          if (cit->getRT() < 0)
          {
            if (mz_doubles.find(cit->getMZ()) == mz_doubles.end())
            {
              mz_doubles.insert(cit->getMZ());
            }
            else
            {
              LOG_INFO << "Found duplicate m/z entry (" << cit->getMZ() << ") for auto-rt. Skipping ..." << std::endl;
              continue;
            }

            ConsensusMap cm_RT_multiplex;
            for (MSSpectrum<>::ConstIterator itp = tics_pp.begin(); itp != tics_pp.end(); ++itp)
            {
              ConsensusFeature f = *cit;
              f.setRT(itp->getMZ());
              cm.push_back(f);
            }

          }
          else
          { // default feature with no auto-rt
            LOG_INFO << "copying feature with RT " << cit->getRT() << std::endl;
            cm.push_back(*cit);
          }
        }

        // resize, since we have more positions now
        vec_single.resize(cm.size());
      }


      // search for each EIC and add up
      Int not_found(0);
      Map<Size, double> quant;

      String description;
      if (fi < in_header.size())
      {
        HeaderInfo info(in_header[fi]);
        description = info.header_description;
      }

      if (fi == 0)
      { // two additional columns for first file (theoretical RT and m/z)
        tf_single_header0 << "" << "";
        tf_single_header1 << "" << "";
        tf_single_header2 << "RT" << "mz";
      }

      // 5 entries for each input file
      tf_single_header0 << File::basename(in[fi]) << "" << "" << "" << "";
      tf_single_header1 << description << "" << "" << "" << "";
      tf_single_header2 << "RTobs" << "dRT" << "mzobs" << "dppm" << "intensity";

      for (Size i = 0; i < cm.size(); ++i)
      {
        //std::cerr << "Rt" << cm[i].getRT() << "  mz: " << cm[i].getMZ() << " R " <<  cm[i].getMetaValue("rank") << "\n";

        double mz_da = mztol * cm[i].getMZ() / 1e6; // mz tolerance in Dalton
        MSExperiment<>::ConstAreaIterator it = exp.areaBeginConst(cm[i].getRT() - rttol / 2,
                                                                  cm[i].getRT() + rttol / 2,
                                                                  cm[i].getMZ() - mz_da,
                                                                  cm[i].getMZ() + mz_da);
        Peak2D max_peak;
        max_peak.setIntensity(0);
        max_peak.setRT(cm[i].getRT());
        max_peak.setMZ(cm[i].getMZ());
        for (; it != exp.areaEndConst(); ++it)
        {
          if (max_peak.getIntensity() < it->getIntensity())
          {
            max_peak.setIntensity(it->getIntensity());
            max_peak.setRT(it.getRT());
            max_peak.setMZ(it->getMZ());
          }
        }
        double ppm = 0; // observed m/z offset

        if (max_peak.getIntensity() == 0)
        {
          ++not_found;
        }
        else
        {
          // take median for m/z found
          std::vector<double> mz;
          MSExperiment<>::Iterator itm = exp.RTBegin(max_peak.getRT());
          SignedSize low = std::min<SignedSize>(std::distance(exp.begin(), itm), rt_collect);
          SignedSize high = std::min<SignedSize>(std::distance(itm, exp.end()) - 1, rt_collect);
          MSExperiment<>::AreaIterator itt = exp.areaBegin((itm - low)->getRT() - 0.01, (itm + high)->getRT() + 0.01, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da);
          for (; itt != exp.areaEnd(); ++itt)
          {
            mz.push_back(itt->getMZ());
            //std::cerr << "ppm: " << itt.getRT() << " " <<  itt->getMZ() << " " << itt->getIntensity() << std::endl;
          }

          if ((SignedSize)mz.size() > (low + high + 1)) LOG_WARN << "Compound " << i << " has overlapping peaks [" << mz.size() << "/" << low + high + 1 << "]" << std::endl;

          if (!mz.empty())
          {
            double avg_mz = std::accumulate(mz.begin(), mz.end(), 0.0) / double(mz.size());
            //std::cerr << "avg: " << avg_mz << "\n";
            ppm = (avg_mz - cm[i].getMZ()) / cm[i].getMZ() * 1e6;
          }

        }

        // appending the second column set requires separator
        String append_sep = (fi == 0 ? "" : out_sep);

        vec_single[i] += append_sep; // new line
        if (fi == 0)
        {
          vec_single[i] += String(cm[i].getRT()) + out_sep +
                           String(cm[i].getMZ()) + out_sep;
        }
        vec_single[i] += String(max_peak.getRT()) + out_sep +
                         String(max_peak.getRT() - cm[i].getRT()) + out_sep +
                         String(max_peak.getMZ()) + out_sep +
                         String(ppm)  + out_sep +
                         String(max_peak.getIntensity());
      }

      if (not_found) LOG_INFO << "Missing peaks for " << not_found << " compounds in file '" << in[fi] << "'.\n";
    }

    //-------------------------------------------------------------
    // create header
    //-------------------------------------------------------------
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header2, out_sep));
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header1, out_sep));
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header0, out_sep));

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    TextFile tf;
    for (std::vector<String>::iterator v_it = vec_single.begin(); v_it != vec_single.end(); ++v_it)
    {
      tf.addLine(*v_it);
    }
    tf.store(out);

    return EXECUTION_OK;
  }
Exemplo n.º 8
0
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec)
{
    //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0)
    //{
    //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl;
    //return;
    //}

    PeakSpectrum new_CID_spec(CID_spec);
    windowMower_(new_CID_spec, 0.3, 1);

    Param zhang_param;
    zhang_param = zhang_.getParameters();
    zhang_param.setValue("tolerance", fragment_mass_tolerance_);
    zhang_param.setValue("use_gaussian_factor", "true");
    zhang_param.setValue("use_linear_factor", "false");
    zhang_.setParameters(zhang_param);


    Normalizer normalizer;
    Param n_param(normalizer.getParameters());
    n_param.setValue("method", "to_one");
    normalizer.setParameters(n_param);
    normalizer.filterSpectrum(new_CID_spec);

    Size charge(2);
    double precursor_weight(0);     // [M+H]+
    if (!CID_spec.getPrecursors().empty())
    {
        // believe charge of spectrum?
        if (CID_spec.getPrecursors().begin()->getCharge() != 0)
        {
            charge = CID_spec.getPrecursors().begin()->getCharge();
        }
        else
        {
            // TODO estimate charge state
        }
        precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U);
    }

    //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl;

    // now delete all peaks that are right of the estimated precursor weight
    Size peak_counter(0);
    for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter)
    {
        if (it->getPosition()[0] > precursor_weight)
        {
            break;
        }
    }
    if (peak_counter < new_CID_spec.size())
    {
        new_CID_spec.resize(peak_counter);
    }


    static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight();

    Peak1D p;
    p.setIntensity(1);
    p.setPosition(oxonium_mass);

    new_CID_spec.push_back(p);

    p.setPosition(precursor_weight);
    new_CID_spec.push_back(p);

    // add complement to spectrum
    /*
    for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1)
    {
    // get m/z of complement
    double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U;

    // search if peaks are available that have similar m/z values
    Size count(0);
    bool found(false);
    for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count)
    {
    if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance)
    {
      // add peak intensity to corresponding peak in new_CID_spec
      new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity());
    }
    }
    if (!found)
    {
    // infer this peak
    Peak1D p;
    p.setIntensity(it1->getIntensity());
    p.setPosition(mz_comp);
    new_CID_spec.push_back(p);
    }
    }*/

    CompNovoIonScoringCID ion_scoring;
    Param ion_scoring_param(ion_scoring.getParameters());
    ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_);
    ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_);
    ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_);
    ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold"));
    ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score"));
    ion_scoring_param.setValue("max_isotope", max_isotope_);
    ion_scoring.setParameters(ion_scoring_param);

    Map<double, IonScore> ion_scores;
    ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge);

    new_CID_spec.sortByPosition();

    /*
    cerr << "Size of ion_scores " << ion_scores.size() << endl;
    for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
        cerr << it->first << " " << it->second.score << endl;
    }*/

#ifdef WRITE_SCORED_SPEC
    PeakSpectrum filtered_spec(new_CID_spec);
    filtered_spec.clear();
    for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
        Peak1D p;
        p.setIntensity(it->second.score);
        p.setPosition(it->first);
        filtered_spec.push_back(p);
    }
    DTAFile().store("spec_scored.dta", filtered_spec);
#endif

    set<String> sequences;
    getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores);

#ifdef SPIKE_IN
    sequences.insert("AFCVDGEGR");
    sequences.insert("APEFAAPWPDFVPR");
    sequences.insert("AVKQFEESQGR");
    sequences.insert("CCTESLVNR");
    sequences.insert("DAFLGSFLYEYSR");
    sequences.insert("DAIPENLPPLTADFAEDK");
    sequences.insert("DDNKVEDIWSFLSK");
    sequences.insert("DDPHACYSTVFDK");
    sequences.insert("DEYELLCLDGSR");
    sequences.insert("DGAESYKELSVLLPNR");
    sequences.insert("DGASCWCVDADGR");
    sequences.insert("DLFIPTCLETGEFAR");
    sequences.insert("DTHKSEIAHR");
    sequences.insert("DVCKNYQEAK");
    sequences.insert("EACFAVEGPK");
    sequences.insert("ECCHGDLLECADDR");
    sequences.insert("EFLGDKFYTVISSLK");
    sequences.insert("EFTPVLQADFQK");
    sequences.insert("ELFLDSGIFQPMLQGR");
    sequences.insert("ETYGDMADCCEK");
    sequences.insert("EVGCPSSSVQEMVSCLR");
    sequences.insert("EYEATLEECCAK");
    sequences.insert("FADLIQSGTFQLHLDSK");
    sequences.insert("FFSASCVPGATIEQK");
    sequences.insert("FLANVSTVLTSK");
    sequences.insert("FLSGSDYAIR");
    sequences.insert("FTASCPPSIK");
    sequences.insert("GAIEWEGIESGSVEQAVAK");
    sequences.insert("GDVAFIQHSTVEENTGGK");
    sequences.insert("GEPPSCAEDQSCPSER");
    sequences.insert("GEYVPTSLTAR");
    sequences.insert("GQEFTITGQKR");
    sequences.insert("GTFAALSELHCDK");
    sequences.insert("HLVDEPQNLIK");
    sequences.insert("HQDCLVTTLQTQPGAVR");
    sequences.insert("HTTVNENAPDQK");
    sequences.insert("ILDCGSPDTEVR");
    sequences.insert("KCPSPCQLQAER");
    sequences.insert("KGTEFTVNDLQGK");
    sequences.insert("KQTALVELLK");
    sequences.insert("KVPQVSTPTLVEVSR");
    sequences.insert("LALQFTTNAKR");
    sequences.insert("LCVLHEKTPVSEK");
    sequences.insert("LFTFHADICTLPDTEK");
    sequences.insert("LGEYGFQNALIVR");
    sequences.insert("LHVDPENFK");
    sequences.insert("LKECCDKPLLEK");
    sequences.insert("LKHLVDEPQNLIK");
    sequences.insert("LKPDPNTLCDEFK");
    sequences.insert("LLGNVLVVVLAR");
    sequences.insert("LLVVYPWTQR");
    sequences.insert("LRVDPVNFK");
    sequences.insert("LTDEELAFPPLSPSR");
    sequences.insert("LVNELTEFAK");
    sequences.insert("MFLSFPTTK");
    sequences.insert("MPCTEDYLSLILNR");
    sequences.insert("NAPYSGYSGAFHCLK");
    sequences.insert("NECFLSHKDDSPDLPK");
    sequences.insert("NEPNKVPACPGSCEEVK");
    sequences.insert("NLQMDDFELLCTDGR");
    sequences.insert("QAGVQAEPSPK");
    sequences.insert("RAPEFAAPWPDFVPR");
    sequences.insert("RHPEYAVSVLLR");
    sequences.insert("RPCFSALTPDETYVPK");
    sequences.insert("RSLLLAPEEGPVSQR");
    sequences.insert("SAFPPEPLLCSVQR");
    sequences.insert("SAGWNIPIGTLLHR");
    sequences.insert("SCWCVDEAGQK");
    sequences.insert("SGNPNYPHEFSR");
    sequences.insert("SHCIAEVEK");
    sequences.insert("SISSGFFECER");
    sequences.insert("SKYLASASTMDHAR");
    sequences.insert("SLHTLFGDELCK");
    sequences.insert("SLLLAPEEGPVSQR");
    sequences.insert("SPPQCSPDGAFRPVQCK");
    sequences.insert("SREGDPLAVYLK");
    sequences.insert("SRQIPQCPTSCER");
    sequences.insert("TAGTPVSIPVCDDSSVK");
    sequences.insert("TCVADESHAGCEK");
    sequences.insert("TQFGCLEGFGR");
    sequences.insert("TVMENFVAFVDK");
    sequences.insert("TYFPHFDLSHGSAQVK");
    sequences.insert("TYMLAFDVNDEK");
    sequences.insert("VDEVGGEALGR");
    sequences.insert("VDLLIGSSQDDGLINR");
    sequences.insert("VEDIWSFLSK");
    sequences.insert("VGGHAAEYGAEALER");
    sequences.insert("VGTRCCTKPESER");
    sequences.insert("VKVDEVGGEALGR");
    sequences.insert("VKVDLLIGSSQDDGLINR");
    sequences.insert("VLDSFSNGMK");
    sequences.insert("VLSAADKGNVK");
    sequences.insert("VPQVSTPTLVEVSR");
    sequences.insert("VTKCCTESLVNR");
    sequences.insert("VVAASDASQDALGCVK");
    sequences.insert("VVAGVANALAHR");
    sequences.insert("YICDNQDTISSK");
    sequences.insert("YLASASTMDHAR");
    sequences.insert("YNGVFQECCQAEDK");
#endif

    SpectrumAlignmentScore spectra_zhang;
    spectra_zhang.setParameters(zhang_param);

    vector<PeptideHit> hits;
    Size missed_cleavages = param_.getValue("missed_cleavages");
    for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it)
    {

        Size num_missed = countMissedCleavagesTryptic_(*it);
        if (missed_cleavages < num_missed)
        {
            //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl;
            continue;
        }
        PeakSpectrum CID_sim_spec;
        getCIDSpectrum_(CID_sim_spec, *it, charge);

        //normalizer.filterSpectrum(CID_sim_spec);

        double cid_score = zhang_(CID_sim_spec, CID_spec);

        PeptideHit hit;
        hit.setScore(cid_score);

        hit.setSequence(getModifiedAASequence_(*it));
        hit.setCharge((Int)charge);   //TODO unify charge interface: int or size?
        hits.push_back(hit);
        //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl;
    }

    // rescore the top hits
    id.setHits(hits);
    id.assignRanks();

    hits = id.getHits();

    SpectrumAlignmentScore alignment_score;
    Param align_param(alignment_score.getParameters());
    align_param.setValue("tolerance", fragment_mass_tolerance_);
    align_param.setValue("use_linear_factor", "true");
    alignment_score.setParameters(align_param);

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl;
    }

    Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits");
    if (hits.size() > number_of_prescoring_hits)
    {
        hits.resize(number_of_prescoring_hits);
    }

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        PeakSpectrum CID_sim_spec;
        getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge);

        normalizer.filterSpectrum(CID_sim_spec);

        //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec);

        //double cid_score = spectra_zhang(CID_sim_spec, CID_spec);
        double cid_score = alignment_score(CID_sim_spec, CID_spec);

        //cerr << "Final: " << it->getSequence() << " " << cid_score << endl;

        it->setScore(cid_score);
    }

    id.setHits(hits);
    id.assignRanks();
    hits = id.getHits();

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl;
    }

    Size number_of_hits = param_.getValue("number_of_hits");
    if (id.getHits().size() > number_of_hits)
    {
        hits.resize(number_of_hits);
    }

    id.setHits(hits);
    id.assignRanks();

    return;
}
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    StringList in_spec = getStringList_("in");
    StringList out = getStringList_("out");
    String in_lib = getStringOption_("lib");
    String compare_function = getStringOption_("compare_function");
    Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer");
    float precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance");
    //Int min_precursor_charge = getIntOption_("min_precursor_charge");
    //Int max_precursor_charge = getIntOption_("max_precursor_charge");
    float remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold");
    UInt min_peaks = getIntOption_("filter:min_peaks");
    UInt max_peaks = getIntOption_("filter:max_peaks");
    Int cut_peaks_below = getIntOption_("filter:cut_peaks_below");
    StringList fixed_modifications = getStringList_("fixed_modifications");
    StringList variable_modifications = getStringList_("variable_modifications");
    Int top_hits  = getIntOption_("top_hits");
    if (top_hits < -1)
    {
      writeLog_("top_hits (should be  >= -1 )");
      return ILLEGAL_PARAMETERS;
    }
    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    if (out.size() != in_spec.size())
    {
      writeLog_("out (should be as many as input files)");
      return ILLEGAL_PARAMETERS;
    }

    time_t prog_time = time(NULL);
    MSPFile spectral_library;
    RichPeakMap query, library;
    //spectrum which will be identified
    MzMLFile spectra;
    spectra.setLogType(log_type_);

    time_t start_build_time = time(NULL);
    //-------------------------------------------------------------
    //building map for faster search
    //-------------------------------------------------------------

    //library containing already identified peptide spectra
    vector<PeptideIdentification> ids;
    spectral_library.load(in_lib, ids, library);

    map<Size, vector<PeakSpectrum> > MSLibrary;
    {
      RichPeakMap::iterator s;
      vector<PeptideIdentification>::iterator i;
      ModificationsDB* mdb = ModificationsDB::getInstance();
      for (s = library.begin(), i = ids.begin(); s < library.end(); ++s, ++i)
      {
        double precursor_MZ = (*s).getPrecursors()[0].getMZ();
        Size MZ_multi = (Size)precursor_MZ * precursor_mass_multiplier;
        map<Size, vector<PeakSpectrum> >::iterator found;
        found = MSLibrary.find(MZ_multi);

        PeakSpectrum librar;
        bool variable_modifications_ok = true;
        bool fixed_modifications_ok = true;
        const AASequence& aaseq = i->getHits()[0].getSequence();
        //variable fixed modifications
        if (!fixed_modifications.empty())
        {
          for (Size i = 0; i < aaseq.size(); ++i)
          {
            const   Residue& mod  = aaseq.getResidue(i);
            for (Size s = 0; s < fixed_modifications.size(); ++s)
            {
              if (mod.getOneLetterCode() == mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification())
              {
                fixed_modifications_ok = false;
                break;
              }
            }
          }
        }
        //variable modifications
        if (aaseq.isModified() && (!variable_modifications.empty()))
        {
          for (Size i = 0; i < aaseq.size(); ++i)
          {
            if (aaseq.isModified(i))
            {
              const   Residue& mod  = aaseq.getResidue(i);
              for (Size s = 0; s < variable_modifications.size(); ++s)
              {
                if (mod.getOneLetterCode() == mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification())
                {
                  variable_modifications_ok = false;
                  break;
                }
              }
            }
          }
        }
        if (variable_modifications_ok && fixed_modifications_ok)
        {
          PeptideIdentification& translocate_pid = *i;
          librar.getPeptideIdentifications().push_back(translocate_pid);
          librar.setPrecursors(s->getPrecursors());
          //library entry transformation
          for (UInt l = 0; l < s->size(); ++l)
          {
            Peak1D peak;
            if ((*s)[l].getIntensity() >  remove_peaks_below_threshold)
            {
              const String& info = (*s)[l].getMetaValue("MSPPeakInfo");
              if (info[0] == '?')
              {
                peak.setIntensity(sqrt(0.2 * (*s)[l].getIntensity()));
              }
              else
              {
                peak.setIntensity(sqrt((*s)[l].getIntensity()));
              }

              peak.setMZ((*s)[l].getMZ());
              peak.setPosition((*s)[l].getPosition());
              librar.push_back(peak);
            }
          }
          if (found != MSLibrary.end())
          {
            found->second.push_back(librar);
          }
          else
          {
            vector<PeakSpectrum> tmp;
            tmp.push_back(librar);
            MSLibrary.insert(make_pair(MZ_multi, tmp));
          }
        }
      }
    }
    time_t end_build_time = time(NULL);
    cout << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n";
    //compare function
    PeakSpectrumCompareFunctor* comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function);
    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    double score;
    StringList::iterator in, out_file;
    for (in  = in_spec.begin(), out_file  = out.begin(); in < in_spec.end(); ++in, ++out_file)
    {
      time_t start_time = time(NULL);
      spectra.load(*in, query);
      //Will hold valuable hits
      vector<PeptideIdentification> peptide_ids;
      vector<ProteinIdentification> protein_ids;
      // Write parameters to ProteinIdentifcation
      ProteinIdentification prot_id;
      //Parameters of identificaion
      prot_id.setIdentifier("test");
      prot_id.setSearchEngineVersion("SpecLibSearcher");
      prot_id.setDateTime(DateTime::now());
      prot_id.setScoreType(compare_function);
      ProteinIdentification::SearchParameters searchparam;
      searchparam.precursor_tolerance = precursor_mass_tolerance;
      prot_id.setSearchParameters(searchparam);
      /***********SEARCH**********/
      for (UInt j = 0; j < query.size(); ++j)
      {
        //Set identifier for each identifications
        PeptideIdentification pid;
        pid.setIdentifier("test");
        pid.setScoreType(compare_function);
        ProteinHit pr_hit;
        pr_hit.setAccession(j);
        prot_id.insertHit(pr_hit);
        //RichPeak1D to Peak1D transformation for the compare function query
        PeakSpectrum quer;
        bool peak_ok = true;
        query[j].sortByIntensity(true);
        double min_high_intensity = 0;

        if (query[j].empty() || query[j].getMSLevel() != 2)
        {
          continue;
        }
        if (query[j].getPrecursors().empty())
        {
          writeLog_("Warning MS2 spectrum without precursor information");
          continue;
        }

        min_high_intensity = (1 / cut_peaks_below) * query[j][0].getIntensity();

        query[j].sortByPosition();
        for (UInt k = 0; k < query[j].size() && k < max_peaks; ++k)
        {
          if (query[j][k].getIntensity() >  remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity)
          {
            Peak1D peak;
            peak.setIntensity(sqrt(query[j][k].getIntensity()));
            peak.setMZ(query[j][k].getMZ());
            peak.setPosition(query[j][k].getPosition());
            quer.push_back(peak);
          }
        }
        if (quer.size() >= min_peaks)
        {
          peak_ok = true;
        }
        else
        {
          peak_ok = false;
        }
        double query_MZ = query[j].getPrecursors()[0].getMZ();
        if (peak_ok)
        {
          bool charge_one = false;
          Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0);
          Int margin  = (Int) Math::round((query[j].size() / 100.0) * 1.0);
          for (vector<RichPeak1D>::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent)
          {
            if (peak->getMZ() < query_MZ)
            {
              break;
            }
          }
          if (percent > margin)
          {
            charge_one = true;
          }
          float min_MZ = (query_MZ - precursor_mass_tolerance) * precursor_mass_multiplier;
          float max_MZ = (query_MZ + precursor_mass_tolerance) * precursor_mass_multiplier;
          for (Size mz = (Size)min_MZ; mz <= ((Size)max_MZ) + 1; ++mz)
          {
            map<Size, vector<PeakSpectrum> >::iterator found;
            found = MSLibrary.find(mz);
            if (found != MSLibrary.end())
            {
              vector<PeakSpectrum>& library = found->second;
              for (Size i = 0; i < library.size(); ++i)
              {
                float this_MZ  = library[i].getPrecursors()[0].getMZ() * precursor_mass_multiplier;
                if (this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) || charge_one == false))
                {
                  PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0];
                  PeakSpectrum& librar = library[i];
                  //Special treatment for SpectraST score as it computes a score based on the whole library
                  if (compare_function == "SpectraSTSimilarityScore")
                  {
                    SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor);
                    BinnedSpectrum quer_bin = sp->transform(quer);
                    BinnedSpectrum librar_bin = sp->transform(librar);
                    score = (*sp)(quer, librar); //(*sp)(quer_bin,librar_bin);
                    double dot_bias = sp->dot_bias(quer_bin, librar_bin, score);
                    hit.setMetaValue("DOTBIAS", dot_bias);
                  }
                  else
                  {
                    score = (*comparor)(quer, librar);
                  }

                  DataValue RT(library[i].getRT());
                  DataValue MZ(library[i].getPrecursors()[0].getMZ());
                  hit.setMetaValue("RT", RT);
                  hit.setMetaValue("MZ", MZ);
                  hit.setScore(score);
                  PeptideEvidence pe;
                  pe.setProteinAccession(pr_hit.getAccession());
                  hit.addPeptideEvidence(pe);
                  pid.insertHit(hit);
                }
              }
            }
          }
        }
        pid.setHigherScoreBetter(true);
        pid.sort();
        if (compare_function == "SpectraSTSimilarityScore")
        {
          if (!pid.empty() && !pid.getHits().empty())
          {
            vector<PeptideHit> final_hits;
            final_hits.resize(pid.getHits().size());
            SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor);
            Size runner_up = 1;
            for (; runner_up < pid.getHits().size(); ++runner_up)
            {
              if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() || runner_up > 5)
              {
                break;
              }
            }
            double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore());
            for (Size s = 0; s < pid.getHits().size(); ++s)
            {
              final_hits[s] = pid.getHits()[s];
              final_hits[s].setMetaValue("delta D", delta_D);
              final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore());
              final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS")));

              //final_hits[s].removeMetaValue("DOTBIAS");
            }
            pid.setHits(final_hits);
            pid.sort();
            pid.setMZ(query[j].getPrecursors()[0].getMZ());
            pid.setRT(query_MZ);
          }
        }
        if (top_hits != -1 && (UInt)top_hits < pid.getHits().size())
        {
          vector<PeptideHit> hits;
          hits.resize(top_hits);
          for (Size i = 0; i < (UInt)top_hits; ++i)
          {
            hits[i] = pid.getHits()[i];
          }
          pid.setHits(hits);
        }
        peptide_ids.push_back(pid);
      }
      protein_ids.push_back(prot_id);
      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------
      IdXMLFile id_xml_file;
      id_xml_file.store(*out_file, protein_ids, peptide_ids);
      time_t end_time = time(NULL);
      cout << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n";
    }
    time_t end_time = time(NULL);
    cout << "Total time: " << difftime(end_time, prog_time) << " secconds\n";
    return EXECUTION_OK;
  }
Exemplo n.º 10
0
	bool ShiftModel1D::finish()
	{

		if (!isValid())
		{
			return false;
		}
	
		if (!system_)
		{
			Log.info() << "No valid system found!" << std::endl;
			return false;
		}
		
		// compute the shift model if necessary
		if (compute_shifts_)
		{
			BALL::ShiftModel sm(parameters_.getFilename());
			system_->apply(sm);
		}

		String element = "";
		
		// Peter Bayer proposed as peak width  
		// for H      15Hz
		// for N			10hz
		// for C			5Hz

		// peakwidth is meassured in ppm, since
		// experiments were done in Hz, we convert the values 
		// according to the formular
		// 
		// 		offset [Hz] = offset[ppm] * basic frequency
		//
		// for our prediction we assume a basic frequency of 700 MHz
		float peakwidth = 0.0;
		
		switch(type_)
		{
			case H:
			case H_ON_BACKBONE:
				element = "H";
				//peakwidth = 0.02142; // Peter Bayers estimation
				peakwidth = 0.0032; // this is the former BALL estimation
				break;
			case N:
			case N_BACKBONE:
				element = "N";
				peakwidth = 0.01428;
				break;
			case C:
			case C_BACKBONE:  
				element = "C";
				peakwidth = 0.00714;
				break;
		}
		int counter = 0;

		if (element == "" )
			return true;
		
		for (BALL::ResidueIterator r_it = system_->beginResidue(); +r_it; ++r_it)
		{	
			Atom* atom = NULL;

			for (BALL::AtomIterator at_it = r_it->beginAtom(); +at_it; ++at_it)
			{
				if (hasType_(&(*at_it), type_))
				{
					counter++;
					atom = &(*at_it);
					// we have, get the shift
					float shift = atom->getProperty(BALL::ShiftModule::PROPERTY__SHIFT).getFloat();
					Peak1D peak;
					
					float pos = shift; 
					peak.setPosition(pos);
				
					peak.setWidth(peakwidth);
					peak.setIntensity(peak.getIntensity()+1);
					//setAtom();
					peaks_.push_back(peak);
				}
			}
		}

		std::cout << "Number of considered atoms: "<< counter << std::endl;
		return true;
	}
  void CompNovoIdentificationBase::getCIDSpectrum_(PeakSpectrum & spec, const String & sequence, Size charge, DoubleReal prefix, DoubleReal suffix)
  {
    static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight();
    static DoubleReal nh3_mass = EmpiricalFormula("NH3").getMonoWeight();
    static DoubleReal co_mass = EmpiricalFormula("CO").getMonoWeight();
    Peak1D p;
    DoubleReal b_pos(0 + prefix);
    DoubleReal y_pos(h2o_mass + suffix);
    bool b_H2O_loss(false), b_NH3_loss(false), y_NH3_loss(false);

    for (Size i = 0; i != sequence.size() - 1; ++i)
    {
      char aa(sequence[i]);
      b_pos += aa_to_weight_[aa];

      char aa2(sequence[sequence.size() - i - 1]);
      y_pos += aa_to_weight_[aa2];
      for (Size z = 1; z <= charge && z < 3; ++z)
      {
        // b-ions
        if (b_pos >= min_mz_ && b_pos <= max_mz_)
        {
          for (Size j = 0; j != max_isotope_; ++j)
          {
            if (z == 1 /*|| b_pos > MIN_DOUBLE_MZ*/)
            {
              p.setPosition((b_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j + Constants::NEUTRON_MASS_U) / (DoubleReal)z);
              p.setIntensity(isotope_distributions_[(Size)b_pos][j] * 0.8 / (z * z));
              spec.push_back(p);
            }
          }
        }

        // b-ion losses
        if (b_pos - h2o_mass > min_mz_ && b_pos - h2o_mass < max_mz_)
        {
          if (b_H2O_loss || aa == 'S' || aa == 'T' || aa == 'E' || aa == 'D')
          {
            b_H2O_loss = true;
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - h2o_mass) / z);
            p.setIntensity(0.02 / (DoubleReal)(z * z));
            if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
          if (b_NH3_loss || aa == 'Q' || aa == 'N' || aa == 'R' || aa == 'K')
          {
            b_NH3_loss = true;
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - nh3_mass) / z);
            p.setIntensity(0.02 / (DoubleReal)(z * z));

            if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
        }

        // a-ions only for charge 1
        if (z == 1)
        {
          if (b_pos - co_mass > min_mz_ && b_pos - co_mass < max_mz_)
          {
            // a-ions
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - co_mass) / (DoubleReal)z);
            p.setIntensity(0.1f);
            spec.push_back(p);
          }
        }



        if (y_pos > min_mz_ && y_pos < max_mz_)
        {
          // y-ions
          for (Size j = 0; j != max_isotope_; ++j)
          {
            if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/)
            {
              p.setPosition((y_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)z);
              p.setIntensity(isotope_distributions_[(Size)y_pos][j] / (DoubleReal) (z * z));
              spec.push_back(p);
            }
          }

          // H2O loss
          p.setPosition((y_pos + z * Constants::PROTON_MASS_U - h2o_mass) / (DoubleReal)z);
          p.setIntensity(0.1 / (DoubleReal)(z * z));
          if (aa2 == 'Q')           // pyroglutamic acid formation
          {
            p.setIntensity(0.5f);
          }
          if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/)
          {
            spec.push_back(p);
          }

          // NH3 loss
          if (y_NH3_loss || aa2 == 'Q' || aa2 == 'N' || aa2 == 'R' || aa2 == 'K')
          {
            y_NH3_loss = true;
            p.setPosition((y_pos + z * Constants::PROTON_MASS_U - nh3_mass) / (DoubleReal)z);
            p.setIntensity(0.1 / (DoubleReal)(z * z));

            if (z == 1 /*|| y_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
        }
      }
    }

    // if Q1 abundant loss of water -> pyroglutamic acid formation

    if (sequence[0] == 'Q' && prefix == 0 && suffix == 0)
    {
      /*
      for (PeakSpectrum::Iterator it = spec.begin(); it != spec.end(); ++it)
      {
          it->setIntensity(it->getIntensity() * 0.5);
      }*/

      /*
      for (Size j = 0; j != max_isotope; ++j)
      {
  p.setPosition((precursor_weight + charge - 1 + j)/(DoubleReal)charge);
  p.setIntensity(isotope_distributions_[(Int)p.getPosition()[0]][j] * 0.1);
  spec.push_back(p);
      }
      */
    }


    spec.sortByPosition();

    return;
  }
Exemplo n.º 12
0
  void PeakPickerHiRes::pick(const MSSpectrum& input, MSSpectrum& output, std::vector<PeakBoundary>& boundaries, bool check_spacings) const
  {
    // copy meta data of the input spectrum
    output.clear(true);
    output.SpectrumSettings::operator=(input);
    output.MetaInfoInterface::operator=(input);
    output.setRT(input.getRT());
    output.setMSLevel(input.getMSLevel());
    output.setName(input.getName());
    output.setType(SpectrumSettings::CENTROID);
    if (report_FWHM_)
    {
      output.getFloatDataArrays().resize(1);
      output.getFloatDataArrays()[0].setName( report_FWHM_as_ppm_ ? "FWHM_ppm" : "FWHM");
    }

    // don't pick a spectrum with less than 5 data points
    if (input.size() < 5) return;

    // if both spacing constraints are disabled, don't check spacings at all:
    if ((spacing_difference_ == std::numeric_limits<double>::infinity()) &&
      (spacing_difference_gap_ == std::numeric_limits<double>::infinity()))
    {
      check_spacings = false;
    }

    // signal-to-noise estimation
    SignalToNoiseEstimatorMedian<MSSpectrum > snt;
    snt.setParameters(param_.copy("SignalToNoise:", true));

    if (signal_to_noise_ > 0.0)
    {
      snt.init(input);
    }

    // find local maxima in profile data
    for (Size i = 2; i < input.size() - 2; ++i)
    {
      double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
      double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
      double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();

      // do not interpolate when the left or right support is a zero-data-point
      if (std::fabs(left_neighbor_int) < std::numeric_limits<double>::epsilon()) continue;
      if (std::fabs(right_neighbor_int) < std::numeric_limits<double>::epsilon()) continue;

      // MZ spacing sanity checks
      double left_to_central = 0.0, central_to_right = 0.0, min_spacing = 0.0;
      if (check_spacings)
      {
        left_to_central = central_peak_mz - left_neighbor_mz;
        central_to_right = right_neighbor_mz - central_peak_mz;
        min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
      }

      double act_snt = 0.0, act_snt_l1 = 0.0, act_snt_r1 = 0.0;
      if (signal_to_noise_ > 0.0)
      {
        act_snt = snt.getSignalToNoise(input[i]);
        act_snt_l1 = snt.getSignalToNoise(input[i - 1]);
        act_snt_r1 = snt.getSignalToNoise(input[i + 1]);
      }

      // look for peak cores meeting MZ and intensity/SNT criteria
      if ((central_peak_int > left_neighbor_int) && 
        (central_peak_int > right_neighbor_int) && 
        (act_snt >= signal_to_noise_) && 
        (act_snt_l1 >= signal_to_noise_) && 
        (act_snt_r1 >= signal_to_noise_) &&
        (!check_spacings || 
        ((left_to_central < spacing_difference_ * min_spacing) && 
          (central_to_right < spacing_difference_ * min_spacing))))
      {
        // special case: if a peak core is surrounded by more intense
        // satellite peaks (indicates oscillation rather than
        // real peaks) -> remove

        double act_snt_l2 = 0.0, act_snt_r2 = 0.0;

        if (signal_to_noise_ > 0.0)
        {
          act_snt_l2 = snt.getSignalToNoise(input[i - 2]);
          act_snt_r2 = snt.getSignalToNoise(input[i + 2]);
        }

        // checking signal-to-noise?
        if ((i > 1) &&
          (i + 2 < input.size()) &&
          (left_neighbor_int < input[i - 2].getIntensity()) &&
          (right_neighbor_int < input[i + 2].getIntensity()) &&
          (act_snt_l2 >= signal_to_noise_) &&
          (act_snt_r2 >= signal_to_noise_) &&
          (!check_spacings ||
          ((left_neighbor_mz - input[i - 2].getMZ() < spacing_difference_ * min_spacing) && 
            (input[i + 2].getMZ() - right_neighbor_mz < spacing_difference_ * min_spacing))))
        {
          ++i;
          continue;
        }

        std::map<double, double> peak_raw_data;

        peak_raw_data[central_peak_mz] = central_peak_int;
        peak_raw_data[left_neighbor_mz] = left_neighbor_int;
        peak_raw_data[right_neighbor_mz] = right_neighbor_int;

        // peak core found, now extend it
        // to the left
        Size k = 2;

        bool previous_zero_left(false); // no need to extend peak if previous intensity was zero
        Size missing_left(0);
        Size left_boundary(i - 1); // index of the left boundary for the spline interpolation

        while ((k <= i) && // prevent underflow
          (i - k + 1 > 0) && 
          !previous_zero_left && 
          (missing_left <= missing_) && 
          (input[i - k].getIntensity() <= peak_raw_data.begin()->second) &&
          (!check_spacings || 
          (peak_raw_data.begin()->first - input[i - k].getMZ() < spacing_difference_gap_ * min_spacing)))
        {
          double act_snt_lk = 0.0;

          if (signal_to_noise_ > 0.0)
          {
            act_snt_lk = snt.getSignalToNoise(input[i - k]);
          }

          if ((act_snt_lk >= signal_to_noise_) && 
            (!check_spacings ||
            (peak_raw_data.begin()->first - input[i - k].getMZ() < spacing_difference_ * min_spacing)))
          {
            peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
          }
          else
          {
            ++missing_left;
            if (missing_left <= missing_)
            {
              peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
            }
          }

          previous_zero_left = (input[i - k].getIntensity() == 0);
          left_boundary = i - k;
          ++k;
        }

        // to the right
        k = 2;

        bool previous_zero_right(false); // no need to extend peak if previous intensity was zero
        Size missing_right(0);
        Size right_boundary(i+1); // index of the right boundary for the spline interpolation

        while ((i + k < input.size()) && 
          !previous_zero_right && 
          (missing_right <= missing_) && 
          (input[i + k].getIntensity() <= peak_raw_data.rbegin()->second) &&
          (!check_spacings ||
          (input[i + k].getMZ() - peak_raw_data.rbegin()->first < spacing_difference_gap_ * min_spacing)))
        {
          double act_snt_rk = 0.0;

          if (signal_to_noise_ > 0.0)
          {
            act_snt_rk = snt.getSignalToNoise(input[i + k]);
          }

          if ((act_snt_rk >= signal_to_noise_) && 
            (!check_spacings ||
            (input[i + k].getMZ() - peak_raw_data.rbegin()->first < spacing_difference_ * min_spacing)))
          {
            peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
          }
          else
          {
            ++missing_right;
            if (missing_right <= missing_)
            {
              peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
            }
          }

          previous_zero_right = (input[i + k].getIntensity() == 0);
          right_boundary = i + k;
          ++k;
        }

        // skip if the minimal number of 3 points for fitting is not reached
        if (peak_raw_data.size() < 3) continue;

        CubicSpline2d peak_spline (peak_raw_data);

        // calculate maximum by evaluating the spline's 1st derivative
        // (bisection method)
        double max_peak_mz = central_peak_mz;
        double max_peak_int = central_peak_int;
        double threshold = 1e-6;
        OpenMS::Math::spline_bisection(peak_spline, left_neighbor_mz, right_neighbor_mz, max_peak_mz, max_peak_int, threshold);

        //
        // compute FWHM
        //
        if (report_FWHM_)
        {
          double fwhm_int = max_peak_int / 2.0;
          threshold = 0.01 * fwhm_int;
          double mz_mid, int_mid; 
          // left:
          double mz_left = peak_raw_data.begin()->first;
          double mz_center = max_peak_mz;
          if (peak_spline.eval(mz_left) > fwhm_int)
          { // the spline ends before half max is reached -- take the leftmost point (probably an underestimation)
            mz_mid = mz_left;
          }
          else
          {
            do 
            {
              mz_mid = mz_left / 2 + mz_center / 2;
              int_mid = peak_spline.eval(mz_mid);
              if (int_mid < fwhm_int)
              {
                mz_left = mz_mid;
              }
              else
              {
                mz_center = mz_mid;
              }
            } while (fabs(int_mid - fwhm_int) > threshold);
          }
          const double fwhm_left_mz = mz_mid;

          // right ...
          double mz_right = peak_raw_data.rbegin()->first;
          mz_center = max_peak_mz;
          if (peak_spline.eval(mz_right) > fwhm_int)
          { // the spline ends before half max is reached -- take the rightmost point (probably an underestimation)
            mz_mid = mz_right;
          }
          else
          {
            do 
            {
              mz_mid = mz_right / 2 + mz_center / 2;
              int_mid = peak_spline.eval(mz_mid);
              if (int_mid < fwhm_int)
              {
                mz_right = mz_mid;
              }
              else
              {
                mz_center = mz_mid;
              }

            } while (fabs(int_mid - fwhm_int) > threshold);
          }
          const double fwhm_right_mz = mz_mid;
          const double fwhm_absolute = fwhm_right_mz - fwhm_left_mz;
          output.getFloatDataArrays()[0].push_back( report_FWHM_as_ppm_ ? fwhm_absolute / max_peak_mz  * 1e6 : fwhm_absolute);
        } // FWHM

          // save picked peak into output spectrum
        Peak1D peak;
        PeakBoundary peak_boundary;
        peak.setMZ(max_peak_mz);
        peak.setIntensity(max_peak_int);
        peak_boundary.mz_min = input[left_boundary].getMZ();
        peak_boundary.mz_max = input[right_boundary].getMZ();
        output.push_back(peak);

        boundaries.push_back(peak_boundary);

        // jump over profile data points that have been considered already
        i = i + k - 1;
      }
    }

    return;
  }