コード例 #1
0
  void CompNovoIdentificationCID::getIdentifications(vector<PeptideIdentification> & pep_ids, const PeakMap & exp)
  {
    Size count(1);
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it, ++count)
    {
      //cerr << count << "/" << exp.size() << endl;
      PeptideIdentification id;
      // TODO check if both CID and ETD is present;
      PeakSpectrum CID_spec(*it);
      id.setMetaValue("RT", it->getRT());
      id.setMetaValue("MZ", it->getPrecursors().begin()->getMZ());

      subspec_to_sequences_.clear();
      permute_cache_.clear();
      decomp_cache_.clear();

      getIdentification(id, CID_spec);
      //cerr << "size_of id=" << id.getHits().size() << endl;
      pep_ids.push_back(id);

      //++it;

      //
      //if (count == 10)
      //{
      //return;
      //}
    }
    return;
  }
コード例 #2
0
    ExitCodes main_(int, const char **)
    {
        //-------------------------------------------------------------
        // parameter handling
        //-------------------------------------------------------------

        //input/output files
        String in(getStringOption_("in"));
        String out(getStringOption_("out"));

        //-------------------------------------------------------------
        // loading input
        //-------------------------------------------------------------

        RichPeakMap exp;
        MzMLFile f;
        f.setLogType(log_type_);
        f.load(in, exp);

        writeDebug_("Data set contains " + String(exp.size()) + " spectra", 1);

        //-------------------------------------------------------------
        // calculations
        //-------------------------------------------------------------

        writeDebug_("Reading model file", 2);

        // create model an set the given options
        PILISModel * model = new PILISModel();
        model->readFromFile(getStringOption_("model_file"));
        Param model_param(model->getParameters());
        model_param.setValue("upper_mz", getDoubleOption_("model:upper_mz"));
        model_param.setValue("lower_mz", getDoubleOption_("model:lower_mz"));
        model_param.setValue("charge_directed_threshold", getDoubleOption_("model:charge_directed_threshold"));
        model_param.setValue("charge_remote_threshold", getDoubleOption_("model:charge_remote_threshold"));
        //model_param.setValue("min_main_ion_intensity", getDoubleOption_("model:min_main_ion_intensity"));
        //model_param.setValue("min_loss_ion_intensity", getDoubleOption_("model:min_loss_ion_intensity"));
        model_param.setValue("min_y_ion_intensity", getDoubleOption_("model:min_y_ion_intensity"));
        model_param.setValue("min_b_ion_intensity", getDoubleOption_("model:min_b_ion_intensity"));
        model_param.setValue("min_a_ion_intensity", getDoubleOption_("model:min_a_ion_intensity"));
        model_param.setValue("min_y_loss_intensity", getDoubleOption_("model:min_y_loss_intensity"));
        model_param.setValue("min_b_loss_intensity", getDoubleOption_("model:min_b_loss_intensity"));
        model_param.setValue("charge_loss_factor", getDoubleOption_("model:charge_loss_factor"));
        model_param.setValue("visible_model_depth", getIntOption_("model:visible_model_depth"));
        model_param.setValue("model_depth", getIntOption_("model:model_depth"));
        model_param.setValue("fixed_modifications", getStringOption_("fixed_modifications"));
        model->setParameters(model_param);

        writeDebug_("Reading sequence db", 2);

        // create sequence db
        SuffixArrayPeptideFinder * sapf = new SuffixArrayPeptideFinder(getStringOption_("peptide_db_file"), "trypticCompressed");
        sapf->setTolerance(getDoubleOption_("precursor_mass_tolerance"));
        sapf->setNumberOfModifications(0);
        sapf->setUseTags(false);

        //exp.resize(50); // TODO

        UInt max_charge(3), min_charge(1);         // TODO
        vector<double> pre_weights;
        for (RichPeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
        {
            double pre_weight(it->getPrecursors()[0].getMZ());
            for (Size z = min_charge; z <= max_charge; ++z)
            {
                pre_weights.push_back((pre_weight * (double)z) - (double)z);
            }
        }

        sort(pre_weights.begin(), pre_weights.end());

        cerr << "Getting candidates from SA...";
        vector<vector<pair<pair<String, String>, String> > > candidates;
        sapf->getCandidates(candidates, pre_weights);
        cerr << "done" << endl;

        delete sapf;

        map<double, vector<pair<pair<String, String>, String> > > sorted_candidates;
        UInt count(0);
        for (Size count = 0; count != candidates.size(); ++count)
        {
            sorted_candidates[pre_weights[count]] = candidates[count];
        }
        candidates.clear();

        // create ProteinIdentification and set the options
        PILISIdentification PILIS_id;

        PILIS_id.setModel(model);

        Param id_param(PILIS_id.getParameters());
        id_param.setValue("precursor_mass_tolerance", getDoubleOption_("precursor_mass_tolerance"));
        id_param.setValue("max_candidates", getIntOption_("max_pre_candidates"));
        // disable evalue scoring, this is done separately to allow for a single id per spectrum
        id_param.setValue("use_evalue_scoring", 0);
        id_param.setValue("fixed_modifications", getStringOption_("fixed_modifications"));
        PILIS_id.setParameters(id_param);

        vector<PeptideIdentification> ids;

        // perform the ProteinIdentification of the given spectra
        UInt no(0);
        for (RichPeakMap::Iterator it = exp.begin(); it != exp.end(); ++it, ++no)
        {
            if (it->getMSLevel() == 0)
            {
                writeLog_("Warning: MSLevel is 0, assuming MSLevel 2");
                it->setMSLevel(2);
            }

            if (it->getMSLevel() == 2)
            {
                writeDebug_(String(no) + "/" + String(exp.size()), 1);
                PeptideIdentification id;

                map<String, UInt> cand;

                for (UInt z = min_charge; z <= max_charge; ++z)
                {
                    double pre_weight = (it->getPrecursors()[0].getMZ() * (double)z) - (double)z;
                    for (vector<pair<pair<String, String>, String> >::const_iterator cit = sorted_candidates[pre_weight].begin(); cit != sorted_candidates[pre_weight].end(); ++cit)
                    {
                        String seq = cit->first.second;
                        if (seq.size() > 39)
                        {
                            continue;
                        }
                        UInt num_cleavages_sites(0);
                        for (Size k = 0; k != seq.size(); ++k)
                        {
                            if (k != seq.size() - 1)
                            {
                                if ((seq[k] == 'K' || seq[k] == 'R') && seq[k + 1] != 'P')
                                {
                                    ++num_cleavages_sites;
                                }
                            }
                        }

                        if (num_cleavages_sites > 1)
                        {
                            continue;
                        }

                        cand[seq] = z;
                    }
                }

                cerr << "#cand=" << cand.size() << endl;
                PILIS_id.getIdentification(cand, id, *it);

                id.setMetaValue("RT", it->getRT());
                id.setMetaValue("MZ", it->getPrecursors()[0].getMZ());

                ids.push_back(id);

                if (!id.getHits().empty())
                {
                    cerr << it->getPrecursors()[0].getMZ() << " " << AASequence(id.getHits().begin()->getSequence()).getAverageWeight() << endl;
                    writeDebug_(id.getHits().begin()->getSequence().toString() + " (z=" + id.getHits().begin()->getCharge() + "), score=" + String(id.getHits().begin()->getScore()), 10);
                }
            }
        }

        // perform the PILIS scoring to the spectra
        if (!getFlag_("scoring:do_not_use_evalue_scoring"))
        {
            PILISScoring scoring;
            Param scoring_param(scoring.getParameters());
            scoring_param.setValue("use_local_scoring", (int)getFlag_("scoring:use_local_scoring"));
            scoring_param.setValue("survival_function_bin_size", getIntOption_("scoring:survival_function_bin_size"));
            scoring_param.setValue("global_linear_fitting_threshold", getDoubleOption_("scoring:global_linear_fitting_threshold"));
            scoring_param.setValue("local_linear_fitting_threshold", getDoubleOption_("scoring:local_linear_fitting_threshold"));
            scoring.setParameters(scoring_param);

            scoring.getScores(ids);
        }

        // write the result to the IdentificationData structure for the storing
        UInt max_candidates = getIntOption_("max_candidates");
        for (Size i = 0; i != ids.size(); ++i)
        {
            if (ids[i].getHits().size() > max_candidates)
            {
                vector<PeptideHit> hits = ids[i].getHits();
                hits.resize(max_candidates);
                ids[i].setHits(hits);
            }
        }

        delete model;


        //-------------------------------------------------------------
        // writing output
        //-------------------------------------------------------------

        DateTime now;
        now.now();

        String date_string;
        //now.get(date_string); // @todo Fix it (Andreas)
        String identifier("PILIS_" + date_string);

        //UInt count(0);
        count = 0;
        for (RichPeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
        {
            if (it->getMSLevel() == 2)
            {
                ids[count].setMetaValue("RT", it->getRT());
                ids[count].setMetaValue("MZ", it->getPrecursors()[0].getMZ());

                ids[count].setIdentifier(identifier);
                ids[count++].setHigherScoreBetter(false);
            }
        }

        // search parameters
        ProteinIdentification::SearchParameters search_parameters;
        search_parameters.db = getStringOption_("peptide_db_file");
        search_parameters.db_version = "";
        search_parameters.taxonomy = "";
        //search_parameters.charges = getStringOption_("charges");
        search_parameters.mass_type = ProteinIdentification::MONOISOTOPIC;
        vector<String> fixed_mods;
        getStringOption_("fixed_modifications").split(',', fixed_mods);
        search_parameters.fixed_modifications = fixed_mods;
        search_parameters.enzyme = ProteinIdentification::TRYPSIN;
        search_parameters.missed_cleavages = 1;
        search_parameters.peak_mass_tolerance = getDoubleOption_("peak_mass_tolerance");
        search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance");

        ProteinIdentification protein_identification;
        protein_identification.setDateTime(now);
        protein_identification.setSearchEngine("PILIS");
        protein_identification.setSearchEngineVersion("beta");
        protein_identification.setSearchParameters(search_parameters);
        protein_identification.setIdentifier(identifier);

        vector<ProteinIdentification> protein_identifications;
        protein_identifications.push_back(protein_identification);
        IdXMLFile().store(out, protein_identifications, ids);

        return EXECUTION_OK;
    }
コード例 #3
0
ファイル: SpecLibSearcher.C プロジェクト: BioITer/OpenMS
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    StringList in_spec = getStringList_("in");
    StringList out = getStringList_("out");
    String in_lib = getStringOption_("lib");
    String compare_function = getStringOption_("compare_function");
    Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer");
    Real precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance");
    //Int min_precursor_charge = getIntOption_("min_precursor_charge");
    //Int max_precursor_charge = getIntOption_("max_precursor_charge");
    Real remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold");
    UInt min_peaks = getIntOption_("filter:min_peaks");
    UInt max_peaks = getIntOption_("filter:max_peaks");
    Int cut_peaks_below = getIntOption_("filter:cut_peaks_below");
    StringList fixed_modifications = getStringList_("fixed_modifications");
    StringList variable_modifications = getStringList_("variable_modifications");
    Int top_hits  = getIntOption_("top_hits");
    if (top_hits < -1)
    {
      writeLog_("top_hits (should be  >= -1 )");
      return ILLEGAL_PARAMETERS;
    }
    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    if (out.size() != in_spec.size())
    {
      writeLog_("out (should be as many as input files)");
      return ILLEGAL_PARAMETERS;
    }

    time_t prog_time = time(NULL);
    MSPFile spectral_library;
    RichPeakMap query, library;
    //spectrum which will be identified
    MzMLFile spectra;
    spectra.setLogType(log_type_);

    time_t start_build_time = time(NULL);
    //-------------------------------------------------------------
    //building map for faster search
    //-------------------------------------------------------------

    //library containing already identified peptide spectra
    vector<PeptideIdentification> ids;
    spectral_library.load(in_lib, ids, library);

    map<Size, vector<PeakSpectrum> > MSLibrary;
    {
      RichPeakMap::iterator s;
      vector<PeptideIdentification>::iterator i;
      ModificationsDB * mdb = ModificationsDB::getInstance();
      for (s = library.begin(), i = ids.begin(); s < library.end(); ++s, ++i)
      {
        DoubleReal precursor_MZ = (*s).getPrecursors()[0].getMZ();
        Size MZ_multi = (Size)precursor_MZ * precursor_mass_multiplier;
        map<Size, vector<PeakSpectrum> >::iterator found;
        found = MSLibrary.find(MZ_multi);

        PeakSpectrum librar;
        bool variable_modifications_ok = true;
        bool fixed_modifications_ok = true;
        const AASequence & aaseq = i->getHits()[0].getSequence();
        //variable fixed modifications
        if (!fixed_modifications.empty())
        {
          for (Size i = 0; i < aaseq.size(); ++i)
          {
            const   Residue & mod  = aaseq.getResidue(i);
            for (Size s = 0; s < fixed_modifications.size(); ++s)
            {
              if (mod.getOneLetterCode() == mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification())
              {
                fixed_modifications_ok = false;
                break;
              }
            }
          }
        }
        //variable modifications
        if (aaseq.isModified() && (!variable_modifications.empty()))
        {
          for (Size i = 0; i < aaseq.size(); ++i)
          {
            if (aaseq.isModified(i))
            {
              const   Residue & mod  = aaseq.getResidue(i);
              for (Size s = 0; s < variable_modifications.size(); ++s)
              {
                if (mod.getOneLetterCode() == mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification())
                {
                  variable_modifications_ok = false;
                  break;
                }
              }
            }
          }
        }
        if (variable_modifications_ok && fixed_modifications_ok)
        {
          PeptideIdentification & translocate_pid = *i;
          librar.getPeptideIdentifications().push_back(translocate_pid);
          librar.setPrecursors(s->getPrecursors());
          //library entry transformation
          for (UInt l = 0; l < s->size(); ++l)
          {
            Peak1D peak;
            if ((*s)[l].getIntensity() >  remove_peaks_below_threshold)
            {
              const String & info = (*s)[l].getMetaValue("MSPPeakInfo");
              if (info[0] == '?')
              {
                peak.setIntensity(sqrt(0.2 * (*s)[l].getIntensity()));
              }
              else
              {
                peak.setIntensity(sqrt((*s)[l].getIntensity()));
              }

              peak.setMZ((*s)[l].getMZ());
              peak.setPosition((*s)[l].getPosition());
              librar.push_back(peak);
            }
          }
          if (found != MSLibrary.end())
          {
            found->second.push_back(librar);
          }
          else
          {
            vector<PeakSpectrum> tmp;
            tmp.push_back(librar);
            MSLibrary.insert(make_pair(MZ_multi, tmp));
          }
        }
      }
    }
    time_t end_build_time = time(NULL);
    cout << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n";
    //compare function
    PeakSpectrumCompareFunctor * comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function);
    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    DoubleReal score;
    StringList::iterator in, out_file;
    for (in  = in_spec.begin(), out_file  = out.begin(); in < in_spec.end(); ++in, ++out_file)
    {
      time_t start_time = time(NULL);
      spectra.load(*in, query);
      //Will hold valuable hits
      vector<PeptideIdentification> peptide_ids;
      vector<ProteinIdentification> protein_ids;
      // Write parameters to ProteinIdentifcation
      ProteinIdentification prot_id;
      //Parameters of identificaion
      prot_id.setIdentifier("test");
      prot_id.setSearchEngineVersion("SpecLibSearcher");
      prot_id.setDateTime(DateTime::now());
      prot_id.setScoreType(compare_function);
      ProteinIdentification::SearchParameters searchparam;
      searchparam.precursor_tolerance = precursor_mass_tolerance;
      prot_id.setSearchParameters(searchparam);
      /***********SEARCH**********/
      for (UInt j = 0; j < query.size(); ++j)
      {
        //Set identifier for each identifications
        PeptideIdentification pid;
        pid.setIdentifier("test");
        pid.setScoreType(compare_function);
        ProteinHit pr_hit;
        pr_hit.setAccession(j);
        prot_id.insertHit(pr_hit);
        //RichPeak1D to Peak1D transformation for the compare function query
        PeakSpectrum quer;
        bool peak_ok = true;
        query[j].sortByIntensity(true);
        DoubleReal min_high_intensity = 0;

        if (query[j].empty() || query[j].getMSLevel() != 2)
        {
          continue;
        }
        if (query[j].getPrecursors().empty())
        {
          writeLog_("Warning MS2 spectrum without precursor information");
          continue;
        }

        min_high_intensity = (1 / cut_peaks_below) * query[j][0].getIntensity();

        query[j].sortByPosition();
        for (UInt k = 0; k < query[j].size() && k < max_peaks; ++k)
        {
          if (query[j][k].getIntensity() >  remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity)
          {
            Peak1D peak;
            peak.setIntensity(sqrt(query[j][k].getIntensity()));
            peak.setMZ(query[j][k].getMZ());
            peak.setPosition(query[j][k].getPosition());
            quer.push_back(peak);
          }
        }
        if (quer.size() >= min_peaks)
        {
          peak_ok = true;
        }
        else
        {
          peak_ok = false;
        }
        DoubleReal query_MZ = query[j].getPrecursors()[0].getMZ();
        if (peak_ok)
        {
          bool charge_one = false;
          Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0);
          Int margin  = (Int) Math::round((query[j].size() / 100.0) * 1.0);
          for (vector<RichPeak1D>::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent)
          {
            if (peak->getMZ() < query_MZ)
            {
              break;
            }
          }
          if (percent > margin)
          {
            charge_one = true;
          }
          Real min_MZ = (query_MZ - precursor_mass_tolerance) * precursor_mass_multiplier;
          Real max_MZ = (query_MZ + precursor_mass_tolerance) * precursor_mass_multiplier;
          for (Size mz = (Size)min_MZ; mz <= ((Size)max_MZ) + 1; ++mz)
          {
            map<Size, vector<PeakSpectrum> >::iterator found;
            found = MSLibrary.find(mz);
            if (found != MSLibrary.end())
            {
              vector<PeakSpectrum> & library = found->second;
              for (Size i = 0; i < library.size(); ++i)
              {
                Real this_MZ  = library[i].getPrecursors()[0].getMZ() * precursor_mass_multiplier;
                if (this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) || charge_one == false))
                {
                  PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0];
                  PeakSpectrum & librar = library[i];
                  //Special treatment for SpectraST score as it computes a score based on the whole library
                  if (compare_function == "SpectraSTSimilarityScore")
                  {
                    SpectraSTSimilarityScore * sp = static_cast<SpectraSTSimilarityScore *>(comparor);
                    BinnedSpectrum quer_bin = sp->transform(quer);
                    BinnedSpectrum librar_bin = sp->transform(librar);
                    score = (*sp)(quer, librar);                           //(*sp)(quer_bin,librar_bin);
                    double dot_bias = sp->dot_bias(quer_bin, librar_bin, score);
                    hit.setMetaValue("DOTBIAS", dot_bias);
                  }
                  else
                  {
                    if (compare_function == "CompareFouriertransform")
                    {
                      CompareFouriertransform * ft = static_cast<CompareFouriertransform *>(comparor);
                      ft->transform(quer);
                      ft->transform(librar);
                    }
                    score = (*comparor)(quer, librar);
                  }

                  DataValue RT(library[i].getRT());
                  DataValue MZ(library[i].getPrecursors()[0].getMZ());
                  hit.setMetaValue("RT", RT);
                  hit.setMetaValue("MZ", MZ);
                  hit.setScore(score);
                  hit.addProteinAccession(pr_hit.getAccession());
                  pid.insertHit(hit);
                }
              }
            }
          }
        }
        pid.setHigherScoreBetter(true);
        pid.sort();
        if (compare_function == "SpectraSTSimilarityScore")
        {
          if (!pid.empty() && !pid.getHits().empty())
          {
            vector<PeptideHit> final_hits;
            final_hits.resize(pid.getHits().size());
            SpectraSTSimilarityScore * sp = static_cast<SpectraSTSimilarityScore *>(comparor);
            Size runner_up = 1;
            for (; runner_up < pid.getHits().size(); ++runner_up)
            {
              if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() || runner_up > 5)
              {
                break;
              }
            }
            double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore());
            for (Size s = 0; s < pid.getHits().size(); ++s)
            {
              final_hits[s] = pid.getHits()[s];
              final_hits[s].setMetaValue("delta D", delta_D);
              final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore());
              final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS")));

              //final_hits[s].removeMetaValue("DOTBIAS");
            }
            pid.setHits(final_hits);
            pid.sort();
            pid.setMetaValue("MZ", query[j].getPrecursors()[0].getMZ());
            pid.setMetaValue("RT", query_MZ);
          }
        }
        if (top_hits != -1 && (UInt)top_hits < pid.getHits().size())
        {
          vector<PeptideHit> hits;
          hits.resize(top_hits);
          for (Size i = 0; i < (UInt)top_hits; ++i)
          {
            hits[i] = pid.getHits()[i];
          }
          pid.setHits(hits);
        }
        peptide_ids.push_back(pid);
      }
      protein_ids.push_back(prot_id);
      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------
      IdXMLFile id_xml_file;
      id_xml_file.store(*out_file, protein_ids, peptide_ids);
      time_t end_time = time(NULL);
      cout << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n";
    }
    time_t end_time = time(NULL);
    cout << "Total time: " << difftime(end_time, prog_time) << " secconds\n";
    return EXECUTION_OK;
  }