Exemplo n.º 1
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    String in = getStringOption_("in");
    String out = getStringOption_("out");
    String trafo_in = getStringOption_("trafo_in");
    String trafo_out = getStringOption_("trafo_out");
    Param model_params = getParam_().copy("model:", true);
    String model_type = model_params.getValue("type");
    model_params = model_params.copy(model_type + ":", true);

    ProgressLogger progresslogger;
    progresslogger.setLogType(log_type_);

    //-------------------------------------------------------------
    // check for valid input
    //-------------------------------------------------------------
    if (out.empty() && trafo_out.empty())
    {
      writeLog_("Error: Either a data or a transformation output file has to be provided (parameters 'out'/'trafo_out')");
      return ILLEGAL_PARAMETERS;
    }
    if (in.empty() != out.empty())
    {
      writeLog_("Error: Data input and output parameters ('in'/'out') must be used together");
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // apply transformation
    //-------------------------------------------------------------
    TransformationXMLFile trafoxml;
    TransformationDescription trafo;
    trafoxml.load(trafo_in, trafo);
    if (model_type != "none")
    {
      trafo.fitModel(model_type, model_params);
    }
    if (getFlag_("invert"))
    {
      trafo.invert();
    }
    if (!trafo_out.empty())
    {
      trafoxml.store(trafo_out, trafo);
    }
    if (!in.empty()) // load input
    {
      FileTypes::Type in_type = FileHandler::getType(in);
      if (in_type == FileTypes::MZML)
      {
        MzMLFile file;
        MSExperiment<> map;
        applyTransformation_(in, out, trafo, file, map);
      }
      else if (in_type == FileTypes::FEATUREXML)
      {
        FeatureXMLFile file;
        FeatureMap map;
        applyTransformation_(in, out, trafo, file, map);
      }
      else if (in_type == FileTypes::CONSENSUSXML)
      {
        ConsensusXMLFile file;
        ConsensusMap map;
        applyTransformation_(in, out, trafo, file, map);
      }
      else if (in_type == FileTypes::IDXML)
      {
        IdXMLFile file;
        vector<ProteinIdentification> proteins;
        vector<PeptideIdentification> peptides;
        file.load(in, proteins, peptides);
        bool store_original_rt = getFlag_("store_original_rt");
        MapAlignmentTransformer::transformRetentionTimes(peptides, trafo,
                                                         store_original_rt);
        // no "data processing" section in idXML
        file.store(out, proteins, peptides);
      }
    }

    return EXECUTION_OK;
  }
  NEW_TMP_FILE(filename)
  IdXMLFile().store(filename, protein_ids2, peptide_ids2, document_id2);

  FuzzyStringComparator fuzzy;
  fuzzy.setWhitelist(ListUtils::create<String>("<?xml-stylesheet"));
  fuzzy.setAcceptableAbsolute(0.0001);
  bool result = fuzzy.compareFiles(input_path, filename);
  TEST_EQUAL(result, true);
END_SECTION


START_SECTION([EXTRA] static bool isValid(const String& filename))
  std::vector<ProteinIdentification> protein_ids, protein_ids2;
  std::vector<PeptideIdentification> peptide_ids, peptide_ids2;
  String filename;
  IdXMLFile f;

  //test if empty file is valid
  NEW_TMP_FILE(filename)
  f.store(filename, protein_ids2, peptide_ids2);
  TEST_EQUAL(f.isValid(filename, std::cerr),true);

  //test if full file is valid
  NEW_TMP_FILE(filename);
  String document_id;
  f.load(OPENMS_GET_TEST_DATA_PATH("IdXMLFile_whole.idXML"), protein_ids2, peptide_ids2, document_id);
  protein_ids2[0].setMetaValue("stringvalue",String("bla"));
  protein_ids2[0].setMetaValue("intvalue",4711);
  protein_ids2[0].setMetaValue("floatvalue",5.3);
  f.store(filename, protein_ids2, peptide_ids2);
  TEST_EQUAL(f.isValid(filename, std::cerr),true);
Exemplo n.º 3
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    String in_file = getStringOption_("in");
    String out_file = getStringOption_("out");

    DoubleReal rt_calibrant_1_input = getDoubleOption_("calibrant_1_input");
    DoubleReal rt_calibrant_2_input =  getDoubleOption_("calibrant_2_input");
    DoubleReal rt_calibrant_1_reference =  getDoubleOption_("calibrant_1_reference");
    DoubleReal rt_calibrant_2_reference =  getDoubleOption_("calibrant_2_reference");

    if (rt_calibrant_1_input == rt_calibrant_2_input)
    {
      LOG_ERROR << "rt_calibrant_1_input and rt_calibrant_2_input must not have the same value";
      return ILLEGAL_PARAMETERS;
    }
    if (rt_calibrant_1_reference == rt_calibrant_2_reference)
    {
      LOG_ERROR << "rt_calibrant_1_reference and rt_calibrant_2_reference must not have the same value";
      return ILLEGAL_PARAMETERS;
    }
    if (rt_calibrant_1_reference == -1 || rt_calibrant_2_reference == -1)
    {
      LOG_ERROR << "rt_calibrant_1_reference and rt_calibrant_2_reference must be set";
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // testing whether input and output files are accessible
    //-------------------------------------------------------------

    if (rt_calibrant_1_input > rt_calibrant_2_input)
    {
      DoubleReal temp = rt_calibrant_1_input;
      rt_calibrant_1_input = rt_calibrant_2_input;
      rt_calibrant_2_input = temp;
    }
    if (rt_calibrant_1_reference > rt_calibrant_2_reference)
    {
      DoubleReal temp = rt_calibrant_1_reference;
      rt_calibrant_1_reference = rt_calibrant_2_reference;
      rt_calibrant_2_reference = temp;
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    IdXMLFile file;
    vector<ProteinIdentification> protein_identifications;
    vector<PeptideIdentification> identifications;
    String document_id;
    file.load(in_file, protein_identifications, identifications, document_id);

    for (Size i = 0; i < identifications.size(); ++i)
    {
      if (identifications[i].metaValueExists("RT"))
      {
        DoubleReal temp_rt = identifications[i].getMetaValue("RT");
        temp_rt = (temp_rt - rt_calibrant_1_input) / (rt_calibrant_2_input - rt_calibrant_1_input)
                  * (rt_calibrant_2_reference - rt_calibrant_1_reference) + rt_calibrant_1_reference;
        identifications[i].setMetaValue("RT", temp_rt);
      }
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    file.store(out_file,
               protein_identifications,
               identifications);

    return EXECUTION_OK;
  }
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------

    String inputfile_name = getStringOption_("in");
    String outputfile_name = getStringOption_("out");
    Param fit_algorithm = getParam_().copy("fit_algorithm:", true);
    fit_algorithm.setValue("out_plot", getStringOption_("out_plot")); // re-assemble full param (was moved to top-level)
    bool split_charge = getFlag_("split_charge");
    bool top_hits_only = getFlag_("top_hits_only");
    double fdr_for_targets_smaller = getDoubleOption_("fdr_for_targets_smaller");
    bool target_decoy_available = false;
    bool ignore_bad_data = getFlag_("ignore_bad_data");
    bool prob_correct = getFlag_("prob_correct");

    // Set fixed e-value threshold
    smallest_e_value_ = numeric_limits<double>::denorm_min();

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    IdXMLFile file;
    vector<ProteinIdentification> protein_ids;
    vector<PeptideIdentification> peptide_ids;
    file.load(inputfile_name, protein_ids, peptide_ids);
    vector<double> scores;
    vector<double> decoy;
    vector<double> target;
    set<Int> charges;
    PosteriorErrorProbabilityModel PEP_model;
    PEP_model.setParameters(fit_algorithm);
    StringList search_engines = ListUtils::create<String>("XTandem,OMSSA,MASCOT,SpectraST,MyriMatch,SimTandem,MSGFPlus,MS-GF+,Comet");
    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    if (split_charge)
    {
      for (vector<PeptideIdentification>::iterator pep_it = peptide_ids.begin(); pep_it != peptide_ids.end(); ++pep_it)
      {
        vector<PeptideHit>& hits = pep_it->getHits();
        for (std::vector<PeptideHit>::iterator hit_it = hits.begin(); hit_it != hits.end(); ++hit_it)
        {
          charges.insert(hit_it->getCharge());
        }
      }
      if (charges.empty())
      {
        throw Exception::ElementNotFound(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "no charges found!");
      }
    }
    for (vector<PeptideIdentification>::iterator pep_it = peptide_ids.begin(); pep_it != peptide_ids.end(); ++pep_it)
    {
      if (!pep_it->getHits().empty())
      {
        target_decoy_available = ((pep_it->getScoreType() == "q-value") && pep_it->getHits()[0].metaValueExists("target_decoy"));
        break;
      }
    }

    set<Int>::iterator charge_it = charges.begin(); // charges can be empty, no problem if split_charge is not set
    if (split_charge && charges.empty())
    {
      throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "'split_charge' is set, but the list of charge states is empty");
    }
    map<String, vector<vector<double> > > all_scores;
    char splitter = ','; // to split the engine from the charge state later on
    do
    {
      for (StringList::iterator engine_it = search_engines.begin(); engine_it != search_engines.end(); ++engine_it)
      {
        for (vector<ProteinIdentification>::iterator prot_it = protein_ids.begin(); prot_it != protein_ids.end(); ++prot_it)
        {
          String searchengine = prot_it->getSearchEngine();
          if ((*engine_it == searchengine) || (*engine_it == searchengine.toUpper()))
          {
            for (vector<PeptideIdentification>::iterator pep_it = peptide_ids.begin(); pep_it != peptide_ids.end(); ++pep_it)
            {
              if (prot_it->getIdentifier() == pep_it->getIdentifier())
              {
                vector<PeptideHit>& hits = pep_it->getHits();
                if (top_hits_only)
                {
                  pep_it->sort();
                  if (!hits.empty() && (!split_charge || hits[0].getCharge() == *charge_it))
                  {
                    double score = getScore_(*engine_it, hits[0]);
                    if (!boost::math::isnan(score)) // issue #740: ignore scores with 0 values, otherwise you will get the error "unable to fit data"
                    {
                      scores.push_back(score);

                      if (target_decoy_available)
                      {
                        if (hits[0].getScore() < fdr_for_targets_smaller)
                        {
                          target.push_back(score);
                        }
                        else
                        {
                          decoy.push_back(score);
                        }
                      }
                    }
                  }
                }
                else
                {
                  for (std::vector<PeptideHit>::iterator hit_it = hits.begin(); hit_it != hits.end(); ++hit_it)
                  {
                    if (!split_charge || (hit_it->getCharge() == *charge_it))
                    {
                      double score = getScore_(*engine_it, *hit_it);
                      if (!boost::math::isnan(score)) // issue #740: ignore scores with 0 values, otherwise you will get the error "unable to fit data"
                      {
                        scores.push_back(score);
                      }
                    }
                  }
                }
              }
            }
          }
        }
        if (scores.size() > 2)
        {
          vector<vector<double> > tmp;
          tmp.push_back(scores);
          tmp.push_back(target);
          tmp.push_back(decoy);
          if (split_charge)
          {
            String engine_with_charge_state = *engine_it + String(splitter) + String(*charge_it);
            all_scores.insert(make_pair(engine_with_charge_state, tmp));
          }
          else
          {
            all_scores.insert(make_pair(*engine_it, tmp));
          }
        }

        scores.clear();
        target.clear();
        decoy.clear();
      }

      if (split_charge) ++charge_it;
    }
    while (charge_it != charges.end());

    if (all_scores.empty())
    {
      writeLog_("No data collected. Check whether search engine is supported.");
      if (!ignore_bad_data) return INPUT_FILE_EMPTY;
    }

    String out_plot = fit_algorithm.getValue("out_plot").toString().trim();
    for (map<String, vector<vector<double> > >::iterator score_it = all_scores.begin(); score_it != all_scores.end(); ++score_it)
    {
      vector<String> engine_info;
      score_it->first.split(splitter, engine_info);
      String engine = engine_info[0];
      Int charge = -1;
      if (engine_info.size() == 2)
      {
        charge = engine_info[1].toInt();
      }
      if (split_charge)
      {
        // only adapt plot output if plot is requested (this badly violates the output rules and needs to change!)
        // one way to fix this: plot charges into a single file (no renaming of output file needed) - but this requires major code restructuring
        if (!out_plot.empty()) fit_algorithm.setValue("out_plot", out_plot + "_charge_" + String(charge));
        PEP_model.setParameters(fit_algorithm);
      }

      const bool return_value = PEP_model.fit(score_it->second[0]);
      if (!return_value) writeLog_("Unable to fit data. Algorithm did not run through for the following search engine: " + engine);
      if (!return_value && !ignore_bad_data) return UNEXPECTED_RESULT;

      if (return_value)
      {
        // plot target_decoy
        if (!out_plot.empty() && top_hits_only && target_decoy_available && (score_it->second[0].size() > 0))
        {
          PEP_model.plotTargetDecoyEstimation(score_it->second[1], score_it->second[2]); //target, decoy
        }

        bool unable_to_fit_data = true;
        bool data_might_not_be_well_fit = true;
        for (vector<ProteinIdentification>::iterator prot_it = protein_ids.begin(); prot_it != protein_ids.end(); ++prot_it)
        {
          String searchengine = prot_it->getSearchEngine();
          if ((engine == searchengine) || (engine == searchengine.toUpper()))
          {
            for (vector<PeptideIdentification>::iterator pep_it = peptide_ids.begin(); pep_it != peptide_ids.end(); ++pep_it)
            {
              if (prot_it->getIdentifier() == pep_it->getIdentifier())
              {
                String score_type = pep_it->getScoreType() + "_score";
                vector<PeptideHit> hits = pep_it->getHits();
                for (std::vector<PeptideHit>::iterator hit_it = hits.begin(); hit_it != hits.end(); ++hit_it)
                {
                  if (!split_charge || (hit_it->getCharge() == charge))
                  {
                    double score;
                    hit_it->setMetaValue(score_type, hit_it->getScore());

                    score = getScore_(engine, *hit_it);
                    if (boost::math::isnan(score)) // issue #740: ignore scores with 0 values, otherwise you will get the error "unable to fit data"
                    {
                      score = 1.0;
                    }
                    else 
                    { 
                      score = PEP_model.computeProbability(score);
                      if ((score > 0.0) && (score < 1.0)) unable_to_fit_data = false;  // only if all it->second[0] are 0 or 1 unable_to_fit_data stays true
                      if ((score > 0.2) && (score < 0.8)) data_might_not_be_well_fit = false;  //same as above
                    }
                    hit_it->setScore(score);
                    if (prob_correct)
                    {
                      hit_it->setScore(1.0 - score);
                    }
                    else
                    {
                      hit_it->setScore(score);
                    }
                  }
                }
                pep_it->setHits(hits);
              }
              if (prob_correct)
              {
                pep_it->setScoreType("Posterior Probability");
                pep_it->setHigherScoreBetter(true);
              }
              else
              {
                pep_it->setScoreType("Posterior Error Probability");
                pep_it->setHigherScoreBetter(false);
              }
            }
          }
        }
        if (unable_to_fit_data) writeLog_(String("Unable to fit data for search engine: ") + engine);
        if (unable_to_fit_data && !ignore_bad_data) return UNEXPECTED_RESULT;

        if (data_might_not_be_well_fit) writeLog_(String("Data might not be well fitted for search engine: ") + engine);
      }
    }
    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    file.store(outputfile_name, protein_ids, peptide_ids);
    return EXECUTION_OK;
  }
Exemplo n.º 5
0
  void mergePepXMLProtXML_(StringList filenames, vector<ProteinIdentification>&
                           proteins, vector<PeptideIdentification>& peptides)
  {
    IdXMLFile idxml;
    idxml.load(filenames[0], proteins, peptides);
    vector<ProteinIdentification> pepxml_proteins, protxml_proteins;
    vector<PeptideIdentification> pepxml_peptides, protxml_peptides;

    if (proteins[0].getProteinGroups().empty()) // first idXML contains data from the pepXML
    {
      proteins.swap(pepxml_proteins);
      peptides.swap(pepxml_peptides);
      idxml.load(filenames[1], protxml_proteins, protxml_peptides);
      if (protxml_proteins[0].getProteinGroups().empty())
      {
        throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "None of the input files seems to be derived from a protXML file (information about protein groups is missing).");
      }
    }
    else // first idXML contains data from the protXML
    {
      proteins.swap(protxml_proteins);
      peptides.swap(protxml_peptides);
      idxml.load(filenames[1], pepxml_proteins, pepxml_peptides);
    }

    if ((protxml_peptides.size() > 1) || (protxml_proteins.size() > 1))
    {
      throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "The idXML derived from a protXML file should contain only one 'ProteinIdentification' and one 'PeptideIdentification' instance.");
    }

    // peptide information comes from the pepXML (additional information in
    // the protXML - adapted peptide hit score, "is_unique", "is_contributing"
    // - is not transferred):
    peptides.swap(pepxml_peptides);

    // prepare scores and coverage values of protein hits from the protXML:
    map<String, pair<DoubleReal, DoubleReal> > hit_values;
    ProteinIdentification & protein = protxml_proteins[0];
    for (vector<ProteinHit>::iterator hit_it = protein.getHits().begin();
         hit_it != protein.getHits().end(); ++hit_it)
    {
      hit_values[hit_it->getAccession()] = make_pair(hit_it->getScore(),
                                                     hit_it->getCoverage());
    }

    // merge protein information:
    proteins.swap(pepxml_proteins);
    for (vector<ProteinIdentification>::iterator prot_it = proteins.begin();
         prot_it != proteins.end(); ++prot_it)
    {
      prot_it->getProteinGroups() = protein.getProteinGroups();
      prot_it->getIndistinguishableProteins() =
        protein.getIndistinguishableProteins();
      // TODO: since a protXML file can integrate data from several protein
      // identification runs, the protein groups/indistinguishable proteins
      // that we write to one identification run could contain references to
      // proteins that are not observed in this run, but in others; also, some
      // protein hits without enough evidence may not occur in the protXML
      // (thus also not in the protein groups) - clean this up?

      prot_it->setScoreType(protein.getScoreType());
      prot_it->setHigherScoreBetter(protein.isHigherScoreBetter());
      prot_it->setSignificanceThreshold(protein.getSignificanceThreshold());

      for (vector<ProteinHit>::iterator hit_it = prot_it->getHits().begin();
           hit_it != prot_it->getHits().end(); ++hit_it)
      {
        map<String, pair<DoubleReal, DoubleReal> >::const_iterator pos =
          hit_values.find(hit_it->getAccession());
        if (pos == hit_values.end())
        {
          hit_it->setScore(-1);
        }
        else
        {
          hit_it->setScore(pos->second.first);
          hit_it->setCoverage(pos->second.second);
        }
      }
    }
  }
Exemplo n.º 6
0
	NEW_TMP_FILE(filename)
	IdXMLFile().store(filename, protein_ids2, peptide_ids2, document_id2);

  FuzzyStringComparator fuzzy;
  fuzzy.setWhitelist(StringList::create("<?xml-stylesheet"));
  fuzzy.setAcceptableAbsolute(0.0001);
  bool result = fuzzy.compareFiles(input_path, filename);
  TEST_EQUAL(result, true);
END_SECTION


START_SECTION([EXTRA] static bool isValid(const String& filename))
	std::vector<ProteinIdentification> protein_ids, protein_ids2;
	std::vector<PeptideIdentification> peptide_ids, peptide_ids2;
	String filename;
	IdXMLFile f;

  //test if empty file is valid
	NEW_TMP_FILE(filename)
	f.store(filename, protein_ids2, peptide_ids2);	
  TEST_EQUAL(f.isValid(filename),true);	
	
	//test if full file is valid
	NEW_TMP_FILE(filename);
	String document_id;
	f.load(OPENMS_GET_TEST_DATA_PATH("IdXMLFile_whole.idXML"), protein_ids2, peptide_ids2, document_id);
	protein_ids2[0].setMetaValue("stringvalue",String("bla"));
	protein_ids2[0].setMetaValue("intvalue",4711);
	protein_ids2[0].setMetaValue("floatvalue",5.3);
	f.store(filename, protein_ids2, peptide_ids2);	
  TEST_EQUAL(f.isValid(filename),true);
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------

    StringList id_in(getStringList_("id_in"));
    StringList in_raw(getStringList_("in"));
    Size number_of_bins((UInt)getIntOption_("number_of_bins"));
    bool precursor_error_ppm(getFlag_("precursor_error_ppm"));
    bool fragment_error_ppm(getFlag_("fragment_error_ppm"));
    bool generate_gnuplot_scripts(DataValue(getStringOption_("generate_gnuplot_scripts")).toBool());

    if (in_raw.size() != id_in.size())
    {
      writeLog_("Number of spectrum files and identification files differs...");
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    vector<vector<PeptideIdentification> > pep_ids;
    vector<vector<ProteinIdentification> > prot_ids;
    pep_ids.resize(id_in.size());
    prot_ids.resize(id_in.size());

    IdXMLFile idxmlfile;
    for (Size i = 0; i != id_in.size(); ++i)
    {
      String doc_id;
      idxmlfile.load(id_in[i], prot_ids[i], pep_ids[i], doc_id);
    }

    // read mzML files
    vector<RichPeakMap> maps_raw;
    maps_raw.resize(in_raw.size());

    MzMLFile mzml_file;
    for (Size i = 0; i != in_raw.size(); ++i)
    {
      mzml_file.load(in_raw[i], maps_raw[i]);
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    // mapping ids
    IDMapper mapper;
    for (Size i = 0; i != maps_raw.size(); ++i)
    {
      mapper.annotate(maps_raw[i], pep_ids[i], prot_ids[i]);
    }

    // normalize the spectra
    Normalizer normalizer;
    for (vector<RichPeakMap>::iterator it1 = maps_raw.begin(); it1 != maps_raw.end(); ++it1)
    {
      for (RichPeakMap::Iterator it2 = it1->begin(); it2 != it1->end(); ++it2)
      {
        normalizer.filterSpectrum(*it2);
      }
    }

    // generate precursor statistics
    vector<MassDifference> precursor_diffs;
    if (getStringOption_("precursor_out") != "")
    {
      for (Size i = 0; i != maps_raw.size(); ++i)
      {
        for (Size j = 0; j != maps_raw[i].size(); ++j)
        {
          if (maps_raw[i][j].getPeptideIdentifications().empty())
          {
            continue;
          }
          for (vector<PeptideIdentification>::const_iterator it = maps_raw[i][j].getPeptideIdentifications().begin(); it != maps_raw[i][j].getPeptideIdentifications().end(); ++it)
          {
            if (it->getHits().size() > 0)
            {
              PeptideHit hit = *it->getHits().begin();
              MassDifference md;
              Int charge = hit.getCharge();
              if (charge == 0)
              {
                charge = 1;
              }
              md.exp_mz = it->getMZ();
              md.theo_mz = (hit.getSequence().getMonoWeight() + (double)charge * Constants::PROTON_MASS_U) / (double)charge;
              md.charge = charge;
              precursor_diffs.push_back(md);
            }
          }
        }
      }
    }

    // generate fragment ions statistics
    vector<MassDifference> fragment_diffs;
    TheoreticalSpectrumGenerator tsg;
    SpectrumAlignment sa;
    double fragment_mass_tolerance(getDoubleOption_("fragment_mass_tolerance"));
    Param sa_param(sa.getParameters());
    sa_param.setValue("tolerance", fragment_mass_tolerance);
    sa.setParameters(sa_param);

    if (getStringOption_("fragment_out") != "")
    {
      for (Size i = 0; i != maps_raw.size(); ++i)
      {
        for (Size j = 0; j != maps_raw[i].size(); ++j)
        {
          if (maps_raw[i][j].getPeptideIdentifications().empty())
          {
            continue;
          }
          for (vector<PeptideIdentification>::const_iterator it = maps_raw[i][j].getPeptideIdentifications().begin(); it != maps_raw[i][j].getPeptideIdentifications().end(); ++it)
          {
            if (it->getHits().size() > 0)
            {
              PeptideHit hit = *it->getHits().begin();

              RichPeakSpectrum theo_spec;
              tsg.addPeaks(theo_spec, hit.getSequence(), Residue::YIon);
              tsg.addPeaks(theo_spec, hit.getSequence(), Residue::BIon);

              vector<pair<Size, Size> > pairs;
              sa.getSpectrumAlignment(pairs, theo_spec, maps_raw[i][j]);
              //cerr << hit.getSequence() << " " << hit.getSequence().getSuffix(1).getFormula() << " " << hit.getSequence().getSuffix(1).getFormula().getMonoWeight() << endl;
              for (vector<pair<Size, Size> >::const_iterator pit = pairs.begin(); pit != pairs.end(); ++pit)
              {
                MassDifference md;
                md.exp_mz = maps_raw[i][j][pit->second].getMZ();
                md.theo_mz = theo_spec[pit->first].getMZ();
                //cerr.precision(15);
                //cerr << md.exp_mz << " " << md.theo_mz << " " << md.exp_mz - md.theo_mz << endl;
                md.intensity = maps_raw[i][j][pit->second].getIntensity();
                md.charge = hit.getCharge();
                fragment_diffs.push_back(md);
              }
            }
          }
        }
      }
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    String precursor_out_file(getStringOption_("precursor_out"));
    if (precursor_out_file != "")
    {
      vector<double> errors;
      ofstream precursor_out(precursor_out_file.c_str());
      double min_diff(numeric_limits<double>::max()), max_diff(numeric_limits<double>::min());
      for (Size i = 0; i != precursor_diffs.size(); ++i)
      {
        double diff = getMassDifference(precursor_diffs[i].theo_mz, precursor_diffs[i].exp_mz, precursor_error_ppm);
        precursor_out << diff << "\n";
        errors.push_back(diff);

        if (diff > max_diff)
        {
          max_diff = diff;
        }
        if (diff < min_diff)
        {
          min_diff = diff;
        }
      }
      precursor_out.close();

      // fill histogram with the collected values
      double bin_size = (max_diff - min_diff) / (double)number_of_bins;
      Histogram<double, double> hist(min_diff, max_diff, bin_size);
      for (Size i = 0; i != errors.size(); ++i)
      {
        hist.inc(errors[i], 1.0);
      }

      writeDebug_("min_diff=" + String(min_diff) + ", max_diff=" + String(max_diff) + ", number_of_bins=" + String(number_of_bins), 1);

      // transform the histogram into a vector<DPosition<2> > for the fitting
      vector<DPosition<2> > values;
      for (Size i = 0; i != hist.size(); ++i)
      {
        DPosition<2> p;
        p.setX((double)i / (double)number_of_bins * (max_diff - min_diff) + min_diff);
        p.setY(hist[i]);
        values.push_back(p);
      }

      double mean = Math::mean(errors.begin(), errors.end());
      double abs_dev = Math::absdev(errors.begin(), errors.end(), mean);
      double sdv = Math::sd(errors.begin(), errors.end(), mean);
      sort(errors.begin(), errors.end());
      double median = errors[(Size)(errors.size() / 2.0)];

      writeDebug_("Precursor mean error: " + String(mean), 1);
      writeDebug_("Precursor abs. dev.:  " + String(abs_dev), 1);
      writeDebug_("Precursor std. dev.:  " + String(sdv), 1);
      writeDebug_("Precursor median error:  " + String(median), 1);


      // calculate histogram for gauss fitting
      GaussFitter gf;
      GaussFitter::GaussFitResult init_param (hist.maxValue(), median, sdv/500.0);
      gf.setInitialParameters(init_param);

      try
      {
        gf.fit(values);

        // write gnuplot scripts
        if (generate_gnuplot_scripts)
        {
          ofstream out(String(precursor_out_file + "_gnuplot.dat").c_str());
          for (vector<DPosition<2> >::const_iterator it = values.begin(); it != values.end(); ++it)
          {
            out << it->getX() << " " << it->getY() << endl;
          }
          out.close();

          ofstream gpl_out(String(precursor_out_file + "_gnuplot.gpl").c_str());
          gpl_out << "set terminal png" << endl;
          gpl_out << "set output \"" << precursor_out_file  << "_gnuplot.png\"" << endl;
          if (precursor_error_ppm)
          {
            gpl_out << "set xlabel \"error in ppm\"" << endl;
          }
          else
          {
            gpl_out << "set xlabel \"error in Da\"" << endl;
          }
          gpl_out << "set ylabel \"frequency\"" << endl;
          gpl_out << "plot '" << precursor_out_file << "_gnuplot.dat' title 'Precursor mass error distribution' w boxes, f(x) w lp title 'Gaussian fit of the error distribution'" << endl;
          gpl_out.close();
        }

      }
      catch (Exception::UnableToFit)
      {
        writeLog_("Unable to fit a Gaussian distribution to the precursor mass errors");
      }
    }

    String fragment_out_file(getStringOption_("fragment_out"));
    if (fragment_out_file != "")
    {
      vector<double> errors;
      ofstream fragment_out(fragment_out_file.c_str());
      double min_diff(numeric_limits<double>::max()), max_diff(numeric_limits<double>::min());
      for (Size i = 0; i != fragment_diffs.size(); ++i)
      {
        double diff = getMassDifference(fragment_diffs[i].theo_mz, fragment_diffs[i].exp_mz, fragment_error_ppm);
        fragment_out << diff << endl;
        errors.push_back(diff);

        if (diff > max_diff)
        {
          max_diff = diff;
        }
        if (diff < min_diff)
        {
          min_diff = diff;
        }
      }
      fragment_out.close();

      // fill histogram with the collected values
      // here we use the intensities to scale the error
      // low intensity peaks are likely to be random matches
      double bin_size = (max_diff - min_diff) / (double)number_of_bins;
      Histogram<double, double> hist(min_diff, max_diff, bin_size);
      for (Size i = 0; i != fragment_diffs.size(); ++i)
      {
        double diff = getMassDifference(fragment_diffs[i].theo_mz, fragment_diffs[i].exp_mz, fragment_error_ppm);
        hist.inc(diff, fragment_diffs[i].intensity);
      }

      writeDebug_("min_diff=" + String(min_diff) + ", max_diff=" + String(max_diff) + ", number_of_bins=" + String(number_of_bins), 1);

      // transform the histogram into a vector<DPosition<2> > for the fitting
      vector<DPosition<2> > values;
      for (Size i = 0; i != hist.size(); ++i)
      {
        DPosition<2> p;
        p.setX((double)i / (double)number_of_bins * (max_diff - min_diff) + min_diff);
        p.setY(hist[i]);
        values.push_back(p);
      }

      double mean = Math::mean(errors.begin(), errors.end());
      double abs_dev = Math::absdev(errors.begin(), errors.end(), mean);
      double sdv = Math::sd(errors.begin(), errors.end(), mean);
      sort(errors.begin(), errors.end());
      double median = errors[(Size)(errors.size() / 2.0)];

      writeDebug_("Fragment mean error:  " + String(mean), 1);
      writeDebug_("Fragment abs. dev.:   " + String(abs_dev), 1);
      writeDebug_("Fragment std. dev.:   " + String(sdv), 1);
      writeDebug_("Fragment median error:   " + String(median), 1);

      // calculate histogram for gauss fitting
      GaussFitter gf;
      GaussFitter::GaussFitResult init_param (hist.maxValue(), median, sdv / 100.0);
      gf.setInitialParameters(init_param);

      try
      {
        gf.fit(values);


        // write gnuplot script
        if (generate_gnuplot_scripts)
        {
          ofstream out(String(fragment_out_file + "_gnuplot.dat").c_str());
          for (vector<DPosition<2> >::const_iterator it = values.begin(); it != values.end(); ++it)
          {
            out << it->getX() << " " << it->getY() << endl;
          }
          out.close();

          ofstream gpl_out(String(fragment_out_file + "_gnuplot.gpl").c_str());
          gpl_out << "set terminal png" << endl;
          gpl_out << "set output \"" << fragment_out_file  << "_gnuplot.png\"" << endl;
          if (fragment_error_ppm)
          {
            gpl_out << "set xlabel \"error in ppm\"" << endl;
          }
          else
          {
            gpl_out << "set xlabel \"error in Da\"" << endl;
          }
          gpl_out << "set ylabel \"frequency\"" << endl;
          gpl_out << "plot '" << fragment_out_file << "_gnuplot.dat' title 'Fragment mass error distribution' w boxes, f(x) w lp title 'Gaussian fit of the error distribution'" << endl;
          gpl_out.close();
        }
      }
      catch (Exception::UnableToFit)
      {
        writeLog_("Unable to fit a Gaussian distribution to the fragment mass errors");
      }
    }

    return EXECUTION_OK;
  }
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input/output files
    StringList in(getStringList_("in"));
    StringList id_in(getStringList_("id_in"));
    String trained_model_file(getStringOption_("trained_model_file"));
    String model_file(getStringOption_("model_file"));
    bool score_filtering(getFlag_("score_filtering"));
    double score_threshold(getDoubleOption_("score_threshold"));
    Int min_charge(getIntOption_("min_charge"));
    Int max_charge(getIntOption_("max_charge"));

    if (in.empty())
    {
      writeLog_("For 'training' mode spectra and identifications are needed.");
      return INCOMPATIBLE_INPUT_DATA;
    }

    //bool duplicates_by_tic(getFlag_("duplicates_by_tic"));
    //bool base_model_from_file(getFlag_("base_model_from_file"));

    // create model, either read from a model file, or initialize with default parameters
    PILISModel model;
    if (model_file != "")
    {
      writeDebug_("Reading model from file '" + model_file + "'", 1);
      model.readFromFile(model_file);
    }
    else
    {
      writeDebug_("Initializing model", 1);
      model.setParameters(getParam_().copy("PILIS_parameters:", true));
      model.init();
    }

    Param pilis_param(model.getParameters());
    ModificationDefinitionsSet mod_set(pilis_param.getValue("fixed_modifications"), pilis_param.getValue("variable_modifications"));

    // read spectra file (if available)
    vector<RichPeakMap> exp;
    vector<vector<ProteinIdentification> > prot_ids;
    vector<vector<PeptideIdentification> > pep_ids;

    if (!in.empty())
    {
      FileTypes::Type in_file_type = FileHandler().getType(in[0]);
      writeDebug_("File type of parameter 'in' estimated as '" + FileTypes::typeToName(in_file_type) + "'", 1);
      // TODO check all types
      if (in_file_type == FileTypes::MSP)
      {
        writeDebug_("Reading MSP file", 1);
        MSPFile f;
        exp.resize(in.size());
        pep_ids.resize(in.size());
        for (Size i = 0; i != in.size(); ++i)
        {
          f.load(in[i], pep_ids[i], exp[i]);
          for (Size j = 0; j != exp[i].size(); ++j)
          {
            exp[i][j].getPeptideIdentifications().push_back(pep_ids[i][j]);
          }
        }
      }

      if (in_file_type == FileTypes::MZML)
      {
        MzMLFile f;
        f.setLogType(log_type_);

        exp.resize(in.size());
        for (Size i = 0; i != in.size(); ++i)
        {
          f.load(in[i], exp[i]);
        }
      }
    }

    if (!id_in.empty())
    {
      prot_ids.resize(id_in.size());
      pep_ids.resize(id_in.size());
      IdXMLFile f;
      for (Size i = 0; i != id_in.size(); ++i)
      {
        f.load(id_in[i], prot_ids[i], pep_ids[i]);
      }
    }

    if (!id_in.empty() && !in.empty())
    {
      // map the
      if (id_in.size() != in.size())
      {
        writeLog_("If in parameter contains mzML files and id_in contains idXML files, the number should be equal to allow mapping of the identification to the spectra");
        return INCOMPATIBLE_INPUT_DATA;
      }

      // map the ids to the spectra
      IDMapper id_mapper;
      for (Size i = 0; i != exp.size(); ++i)
      {
        id_mapper.annotate(exp[i], pep_ids[i], prot_ids[i]);
      }
    }

    // get the peptides and spectra
    vector<PILISCrossValidation::Peptide> peptides;

    for (vector<RichPeakMap>::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
    {
      for (RichPeakMap::ConstIterator it2 = it1->begin(); it2 != it1->end(); ++it2)
      {
        if (it2->getPeptideIdentifications().empty())
        {
          continue;
        }

        PeptideHit hit;

        if (it2->getPeptideIdentifications().begin()->getHits().size() > 0)
        {
          hit = *it2->getPeptideIdentifications().begin()->getHits().begin();
        }
        else
        {
          continue;
        }

        // check whether the sequence contains a modification not modelled
        if (!mod_set.isCompatible(hit.getSequence()) || hit.getSequence().size() > (UInt)pilis_param.getValue("visible_model_depth"))
        {
          continue;
        }

        if (score_filtering &&
            ((hit.getScore() < score_threshold && it2->getPeptideIdentifications().begin()->isHigherScoreBetter()) ||
             (hit.getScore() > score_threshold && !it2->getPeptideIdentifications().begin()->isHigherScoreBetter())))
        {
          continue;
        }

        PILISCrossValidation::Peptide pep_struct;
        pep_struct.sequence = hit.getSequence();
        pep_struct.charge = hit.getCharge();
        pep_struct.spec = *it2;
        pep_struct.hits = it2->getPeptideIdentifications().begin()->getHits();

        // check charges
        if (pep_struct.charge < min_charge || pep_struct.charge > max_charge)
        {
          continue;
        }

        peptides.push_back(pep_struct);
      }
    }


    getUniquePeptides(peptides);
    writeDebug_("Number of (unique) peptides for training: " + String(peptides.size()), 1);

    //model.writeToFile("pilis_tmp.dat");

    model.setParameters(pilis_param);
    for (vector<PILISCrossValidation::Peptide>::const_iterator it = peptides.begin(); it != peptides.end(); ++it)
    {
      model.train(it->spec, it->sequence, it->charge);
    }
    model.evaluate();

    if (trained_model_file != "")
    {
      model.writeToFile(trained_model_file);
    }


    return EXECUTION_OK;
  }
Exemplo n.º 9
0
  ExitCodes main_(int, const char**) override
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in = getStringOption_("in");
    String out = getStringOption_("out");

    PeptideIndexing indexer;
    Param param = getParam_().copy("", true);
    Param param_pi = indexer.getParameters();
    param_pi.update(param, false, Log_debug); // suppress param. update message
    indexer.setParameters(param_pi);
    indexer.setLogType(this->log_type_);
    String db_name = getStringOption_("fasta");
    if (!File::readable(db_name))
    {
      String full_db_name;
      try
      {
        full_db_name = File::findDatabase(db_name);
      }
      catch (...)
      {
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
      db_name = full_db_name;
    }


    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    // we stream the Fasta file
    std::vector<ProteinIdentification> prot_ids;
    std::vector<PeptideIdentification> pep_ids;

    IdXMLFile idxmlfile;
    idxmlfile.setLogType(this->log_type_);
    idxmlfile.load(in, prot_ids, pep_ids);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    FASTAContainer<TFI_File> proteins(db_name);
    PeptideIndexing::ExitCodes indexer_exit = indexer.run(proteins, prot_ids, pep_ids);
  
    //-------------------------------------------------------------
    // calculate protein coverage
    //-------------------------------------------------------------

    if (param.getValue("write_protein_sequence").toBool())
    {
      for (Size i = 0; i < prot_ids.size(); ++i)
      {
        prot_ids[i].computeCoverage(pep_ids);
      }
    }
    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    idxmlfile.store(out, prot_ids, pep_ids);

    if (indexer_exit == PeptideIndexing::DATABASE_EMPTY)
    {
      return INPUT_FILE_EMPTY;       
    }
    else if (indexer_exit == PeptideIndexing::UNEXPECTED_RESULT)
    {
      return UNEXPECTED_RESULT;
    }
    else if ((indexer_exit != PeptideIndexing::EXECUTION_OK) &&
             (indexer_exit != PeptideIndexing::PEPTIDE_IDS_EMPTY))
    {
      return UNKNOWN_ERROR;
    }
    return EXECUTION_OK;
  }
Exemplo n.º 10
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------

    String inputfile_name = getStringOption_("in");
    String outputfile_name = getStringOption_("out");
    smallest_e_value_ = getDoubleOption_("smallest_e_value");
    Param fit_algorithm = getParam_().copy("fit_algorithm:", true);
    bool split_charge = getFlag_("split_charge");
    bool top_hits_only = getFlag_("top_hits_only");
    DoubleReal fdr_for_targets_smaller = getDoubleOption_("fdr_for_targets_smaller");
    bool target_decoy_available = false;
    bool ignore_bad_data = getFlag_("ignore_bad_data");
    bool prob_correct = getFlag_("prob_correct");
    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    IdXMLFile file;
    vector<ProteinIdentification> protein_ids;
    vector<PeptideIdentification> peptide_ids;
    file.load(inputfile_name, protein_ids, peptide_ids);
    vector<double> scores;
    vector<double> decoy;
    vector<double> target;
    vector<Int> charges;
    PosteriorErrorProbabilityModel PEP_model;
    PEP_model.setParameters(fit_algorithm);
    StringList search_engines = ListUtils::create<String>("XTandem,OMSSA,MASCOT,SpectraST,MyriMatch,SimTandem");
    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    if (split_charge)
    {
      for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it < peptide_ids.end(); ++it)
      {
        vector<PeptideHit> hits = it->getHits();
        for (std::vector<PeptideHit>::iterator  hit  = hits.begin(); hit < hits.end(); ++hit)
        {
          if (charges.end() == find(charges.begin(), charges.end(), hit->getCharge()))
          {
            charges.push_back(hit->getCharge());
          }
        }
      }
      if (charges.empty())
      {
        throw Exception::ElementNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, "no charges found!");
      }
    }
    for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it < peptide_ids.end(); ++it)
    {
      if (!it->getHits().empty())
      {
        target_decoy_available = (it->getScoreType() == "q-value" &&  it->getHits()[0].getMetaValue("target_decoy") != DataValue::EMPTY);
        break;
      }
    }

    vector<Int>::iterator charge = charges.begin(); // charges can be empty, no problem if split_charge is not set
    if (split_charge && charges.empty())
    {
      throw Exception::Precondition(__FILE__, __LINE__, __PRETTY_FUNCTION__, "split_charge is set and the list of charge states is empty but should not be!");
    }
    map<String, vector<vector<double> > > all_scores;
    char splitter = ','; //to split the engine from the charge state later on
    do
    {
      for (StringList::iterator engine = search_engines.begin(); engine < search_engines.end(); ++engine)
      {
        for (vector<ProteinIdentification>::iterator prot_iter = protein_ids.begin(); prot_iter < protein_ids.end(); ++prot_iter)
        {
          String searchengine_toUpper =  prot_iter->getSearchEngine();
          searchengine_toUpper.toUpper();
          if (*engine == prot_iter->getSearchEngine() || *engine == searchengine_toUpper)
          {
            for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it < peptide_ids.end(); ++it)
            {
              if (prot_iter->getIdentifier().compare(it->getIdentifier()) == 0)
              {
                vector<PeptideHit> hits = it->getHits();
                if (top_hits_only)
                {
                  if (!hits.empty() && (!split_charge || hits[0].getCharge() == *charge))
                  {
                    scores.push_back(get_score_(*engine, hits[0]));
                    if (target_decoy_available)
                    {
                      if (hits[0].getScore() < fdr_for_targets_smaller)
                      {
                        target.push_back(get_score_(*engine, hits[0]));
                      }
                      else
                      {
                        decoy.push_back(get_score_(*engine, hits[0]));
                      }
                    }
                  }
                }
                else
                {
                  for (std::vector<PeptideHit>::iterator  hit  = hits.begin(); hit < hits.end(); ++hit)
                  {
                    if (!split_charge || hit->getCharge() == *charge)
                    {
                      scores.push_back(get_score_(*engine, *hit));
                    }
                  }
                }
              }
            }
          }
        }
        if (scores.size() > 2)
        {
          vector<vector<double> > tmp;
          tmp.push_back(scores);
          tmp.push_back(target);
          tmp.push_back(decoy);
          if (split_charge)
          {
            String engine_with_charge_state = *engine + String(splitter) + String(*charge);
            all_scores.insert(make_pair(engine_with_charge_state, tmp));
          }
          else
          {
            all_scores.insert(make_pair(*engine, tmp));
          }
        }

        scores.clear();
        target.clear();
        decoy.clear();
      }

      if (split_charge) ++charge;

    }
    while (charge < charges.end());

    if (all_scores.empty())
    {
      writeLog_("No data collected. Check whether search engine is supported.");
      if (!ignore_bad_data) return INPUT_FILE_EMPTY;
    }
    for (map<String, vector<vector<double> > >::iterator it = all_scores.begin(); it != all_scores.end(); ++it)
    {
      vector<String> engine_info;
      it->first.split(splitter, engine_info);
      String engine = engine_info[0];
      Int charge = -1;
      if (engine_info.size() == 2)
      {
        charge = engine_info[1].toInt();
      }
      if (split_charge)
      {
        String output_name  = fit_algorithm.getValue("output_name");
        fit_algorithm.setValue("output_name", output_name + "_charge_" + String(charge), "...", ListUtils::create<String>("advanced,output file"));
        PEP_model.setParameters(fit_algorithm);
      }

      const bool return_value = PEP_model.fit(it->second[0]);
      if (!return_value) writeLog_("unable to fit data. Algorithm did not run through for the following search engine: " + engine);
      if (!return_value && !ignore_bad_data) return UNEXPECTED_RESULT;

      if (return_value)
      {
        //plot target_decoy
        if (target_decoy_available && it->second[0].size() > 0)
        {
          PEP_model.plotTargetDecoyEstimation(it->second[1], it->second[2]); //target, decoy
        }

        bool unable_to_fit_data = true;
        bool data_might_not_be_well_fit = true;
        for (vector<ProteinIdentification>::iterator prot_iter = protein_ids.begin(); prot_iter < protein_ids.end(); ++prot_iter)
        {
          String searchengine_toUpper =  prot_iter->getSearchEngine();
          searchengine_toUpper.toUpper();

          if (engine == prot_iter->getSearchEngine() || engine == searchengine_toUpper)
          {
            for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it < peptide_ids.end(); ++it)
            {
              if (prot_iter->getIdentifier().compare(it->getIdentifier()) == 0)
              {
                String score_type = it->getScoreType() + "_score";
                vector<PeptideHit> hits = it->getHits();
                for (std::vector<PeptideHit>::iterator  hit  = hits.begin(); hit < hits.end(); ++hit)
                {
                  if (!split_charge || hit->getCharge() == charge)
                  {
                    DoubleReal score;
                    hit->setMetaValue(score_type, hit->getScore());
                    score = PEP_model.computeProbability(get_score_(engine, *hit));
                    if (score > 0 && score < 1) unable_to_fit_data = false;  //only if all it->second[0] are 0 or 1 unable_to_fit_data stays true
                    if (score > 0.2 && score < 0.8) data_might_not_be_well_fit = false;  //same as above
                    hit->setScore(score);
                    if (prob_correct)
                    {
                      hit->setScore(1 - score);
                    }
                    else
                    {
                      hit->setScore(score);
                    }
                  }
                }
                it->setHits(hits);
              }
              it->setScoreType("Posterior Error Probability");
              it->setHigherScoreBetter(false);
            }
          }
        }
        if (unable_to_fit_data) writeLog_(String("unable to fit data for search engine: ") + engine);
        if (unable_to_fit_data && !ignore_bad_data) return UNEXPECTED_RESULT;

        if (data_might_not_be_well_fit) writeLog_(String("data might not be well fitted for search engine: ") + engine);
      }
    }
    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    file.store(outputfile_name, protein_ids, peptide_ids);
    return EXECUTION_OK;
  }