PILISScoring* ptr = 0; PILISScoring* nullPointer = 0; String filename(OPENMS_GET_TEST_DATA_PATH("IDFilter_test2.idXML")); START_SECTION(PILISScoring()) ptr = new PILISScoring(); TEST_NOT_EQUAL(ptr, nullPointer) END_SECTION START_SECTION(~PILISScoring()) delete ptr; END_SECTION ptr = new PILISScoring(); START_SECTION(PILISScoring(const PILISScoring& source)) PILISScoring copy(*ptr); TEST_EQUAL(copy.getParameters(), ptr->getParameters()) END_SECTION START_SECTION(PILISScoring& operator = (const PILISScoring& source)) PILISScoring copy; copy = *ptr; TEST_EQUAL(copy.getParameters(), ptr->getParameters()) END_SECTION START_SECTION(void getScores(std::vector<PeptideIdentification>& ids)) vector<PeptideIdentification> ids; vector<ProteinIdentification> prot_ids; String document_id; IdXMLFile().load(filename, prot_ids, ids, document_id); ptr->getScores(ids);
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")); String out(getStringOption_("out")); //------------------------------------------------------------- // loading input //------------------------------------------------------------- RichPeakMap exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); writeDebug_("Data set contains " + String(exp.size()) + " spectra", 1); //------------------------------------------------------------- // calculations //------------------------------------------------------------- writeDebug_("Reading model file", 2); // create model an set the given options PILISModel * model = new PILISModel(); model->readFromFile(getStringOption_("model_file")); Param model_param(model->getParameters()); model_param.setValue("upper_mz", getDoubleOption_("model:upper_mz")); model_param.setValue("lower_mz", getDoubleOption_("model:lower_mz")); model_param.setValue("charge_directed_threshold", getDoubleOption_("model:charge_directed_threshold")); model_param.setValue("charge_remote_threshold", getDoubleOption_("model:charge_remote_threshold")); //model_param.setValue("min_main_ion_intensity", getDoubleOption_("model:min_main_ion_intensity")); //model_param.setValue("min_loss_ion_intensity", getDoubleOption_("model:min_loss_ion_intensity")); model_param.setValue("min_y_ion_intensity", getDoubleOption_("model:min_y_ion_intensity")); model_param.setValue("min_b_ion_intensity", getDoubleOption_("model:min_b_ion_intensity")); model_param.setValue("min_a_ion_intensity", getDoubleOption_("model:min_a_ion_intensity")); model_param.setValue("min_y_loss_intensity", getDoubleOption_("model:min_y_loss_intensity")); model_param.setValue("min_b_loss_intensity", getDoubleOption_("model:min_b_loss_intensity")); model_param.setValue("charge_loss_factor", getDoubleOption_("model:charge_loss_factor")); model_param.setValue("visible_model_depth", getIntOption_("model:visible_model_depth")); model_param.setValue("model_depth", getIntOption_("model:model_depth")); model_param.setValue("fixed_modifications", getStringOption_("fixed_modifications")); model->setParameters(model_param); writeDebug_("Reading sequence db", 2); // create sequence db SuffixArrayPeptideFinder * sapf = new SuffixArrayPeptideFinder(getStringOption_("peptide_db_file"), "trypticCompressed"); sapf->setTolerance(getDoubleOption_("precursor_mass_tolerance")); sapf->setNumberOfModifications(0); sapf->setUseTags(false); //exp.resize(50); // TODO UInt max_charge(3), min_charge(1); // TODO vector<double> pre_weights; for (RichPeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { double pre_weight(it->getPrecursors()[0].getMZ()); for (Size z = min_charge; z <= max_charge; ++z) { pre_weights.push_back((pre_weight * (double)z) - (double)z); } } sort(pre_weights.begin(), pre_weights.end()); cerr << "Getting candidates from SA..."; vector<vector<pair<pair<String, String>, String> > > candidates; sapf->getCandidates(candidates, pre_weights); cerr << "done" << endl; delete sapf; map<double, vector<pair<pair<String, String>, String> > > sorted_candidates; UInt count(0); for (Size count = 0; count != candidates.size(); ++count) { sorted_candidates[pre_weights[count]] = candidates[count]; } candidates.clear(); // create ProteinIdentification and set the options PILISIdentification PILIS_id; PILIS_id.setModel(model); Param id_param(PILIS_id.getParameters()); id_param.setValue("precursor_mass_tolerance", getDoubleOption_("precursor_mass_tolerance")); id_param.setValue("max_candidates", getIntOption_("max_pre_candidates")); // disable evalue scoring, this is done separately to allow for a single id per spectrum id_param.setValue("use_evalue_scoring", 0); id_param.setValue("fixed_modifications", getStringOption_("fixed_modifications")); PILIS_id.setParameters(id_param); vector<PeptideIdentification> ids; // perform the ProteinIdentification of the given spectra UInt no(0); for (RichPeakMap::Iterator it = exp.begin(); it != exp.end(); ++it, ++no) { if (it->getMSLevel() == 0) { writeLog_("Warning: MSLevel is 0, assuming MSLevel 2"); it->setMSLevel(2); } if (it->getMSLevel() == 2) { writeDebug_(String(no) + "/" + String(exp.size()), 1); PeptideIdentification id; map<String, UInt> cand; for (UInt z = min_charge; z <= max_charge; ++z) { double pre_weight = (it->getPrecursors()[0].getMZ() * (double)z) - (double)z; for (vector<pair<pair<String, String>, String> >::const_iterator cit = sorted_candidates[pre_weight].begin(); cit != sorted_candidates[pre_weight].end(); ++cit) { String seq = cit->first.second; if (seq.size() > 39) { continue; } UInt num_cleavages_sites(0); for (Size k = 0; k != seq.size(); ++k) { if (k != seq.size() - 1) { if ((seq[k] == 'K' || seq[k] == 'R') && seq[k + 1] != 'P') { ++num_cleavages_sites; } } } if (num_cleavages_sites > 1) { continue; } cand[seq] = z; } } cerr << "#cand=" << cand.size() << endl; PILIS_id.getIdentification(cand, id, *it); id.setMetaValue("RT", it->getRT()); id.setMetaValue("MZ", it->getPrecursors()[0].getMZ()); ids.push_back(id); if (!id.getHits().empty()) { cerr << it->getPrecursors()[0].getMZ() << " " << AASequence(id.getHits().begin()->getSequence()).getAverageWeight() << endl; writeDebug_(id.getHits().begin()->getSequence().toString() + " (z=" + id.getHits().begin()->getCharge() + "), score=" + String(id.getHits().begin()->getScore()), 10); } } } // perform the PILIS scoring to the spectra if (!getFlag_("scoring:do_not_use_evalue_scoring")) { PILISScoring scoring; Param scoring_param(scoring.getParameters()); scoring_param.setValue("use_local_scoring", (int)getFlag_("scoring:use_local_scoring")); scoring_param.setValue("survival_function_bin_size", getIntOption_("scoring:survival_function_bin_size")); scoring_param.setValue("global_linear_fitting_threshold", getDoubleOption_("scoring:global_linear_fitting_threshold")); scoring_param.setValue("local_linear_fitting_threshold", getDoubleOption_("scoring:local_linear_fitting_threshold")); scoring.setParameters(scoring_param); scoring.getScores(ids); } // write the result to the IdentificationData structure for the storing UInt max_candidates = getIntOption_("max_candidates"); for (Size i = 0; i != ids.size(); ++i) { if (ids[i].getHits().size() > max_candidates) { vector<PeptideHit> hits = ids[i].getHits(); hits.resize(max_candidates); ids[i].setHits(hits); } } delete model; //------------------------------------------------------------- // writing output //------------------------------------------------------------- DateTime now; now.now(); String date_string; //now.get(date_string); // @todo Fix it (Andreas) String identifier("PILIS_" + date_string); //UInt count(0); count = 0; for (RichPeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it) { if (it->getMSLevel() == 2) { ids[count].setMetaValue("RT", it->getRT()); ids[count].setMetaValue("MZ", it->getPrecursors()[0].getMZ()); ids[count].setIdentifier(identifier); ids[count++].setHigherScoreBetter(false); } } // search parameters ProteinIdentification::SearchParameters search_parameters; search_parameters.db = getStringOption_("peptide_db_file"); search_parameters.db_version = ""; search_parameters.taxonomy = ""; //search_parameters.charges = getStringOption_("charges"); search_parameters.mass_type = ProteinIdentification::MONOISOTOPIC; vector<String> fixed_mods; getStringOption_("fixed_modifications").split(',', fixed_mods); search_parameters.fixed_modifications = fixed_mods; search_parameters.enzyme = ProteinIdentification::TRYPSIN; search_parameters.missed_cleavages = 1; search_parameters.peak_mass_tolerance = getDoubleOption_("peak_mass_tolerance"); search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance"); ProteinIdentification protein_identification; protein_identification.setDateTime(now); protein_identification.setSearchEngine("PILIS"); protein_identification.setSearchEngineVersion("beta"); protein_identification.setSearchParameters(search_parameters); protein_identification.setIdentifier(identifier); vector<ProteinIdentification> protein_identifications; protein_identifications.push_back(protein_identification); IdXMLFile().store(out, protein_identifications, ids); return EXECUTION_OK; }