// public methods void TransitionPQPReader::convertTargetedExperimentToPQP(const char* filename, OpenMS::TargetedExperiment& targeted_exp) { if (targeted_exp.containsInvalidReferences()) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Your input file contains invalid references, cannot process file."); } writePQPOutput_(filename, targeted_exp); }
void ChromatogramExtractor::populate_PeptideRTMap(OpenMS::TargetedExperiment& transition_exp, double rt_extraction_window) { // Store the peptide retention times in an intermediate map PeptideRTMap_.clear(); for (Size i = 0; i < transition_exp.getPeptides().size(); i++) { const TargetedExperiment::Peptide& pep = transition_exp.getPeptides()[i]; if (pep.rts.empty() || pep.rts[0].getCVTerms()["MS:1000896"].empty()) { // we dont have retention times -> this is only a problem if we actually // wanted to use the RT limit feature. if (rt_extraction_window >= 0) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Error: Peptide " + pep.id + " does not have normalized retention times (term 1000896) which are necessary to perform an RT-limited extraction"); } continue; } PeptideRTMap_[pep.id] = pep.rts[0].getCVTerms()["MS:1000896"][0].getValue().toString().toDouble(); } }
void OpenSwathHelper::selectSwathTransitions(const OpenMS::TargetedExperiment& targeted_exp, OpenMS::TargetedExperiment& transition_exp_used, double min_upper_edge_dist, double lower, double upper) { transition_exp_used.setPeptides(targeted_exp.getPeptides()); transition_exp_used.setProteins(targeted_exp.getProteins()); for (Size i = 0; i < targeted_exp.getTransitions().size(); i++) { ReactionMonitoringTransition tr = targeted_exp.getTransitions()[i]; if (lower < tr.getPrecursorMZ() && tr.getPrecursorMZ() < upper && std::fabs(upper - tr.getPrecursorMZ()) >= min_upper_edge_dist) { transition_exp_used.addTransition(tr); } } }
void OpenSwathDataAccessHelper::convertTargetedExp(const OpenMS::TargetedExperiment & transition_exp_, OpenSwath::LightTargetedExperiment & transition_exp) { //copy proteins for (Size i = 0; i < transition_exp_.getProteins().size(); i++) { OpenSwath::LightProtein p; p.id = transition_exp_.getProteins()[i].id; transition_exp.proteins.push_back(p); } //copy peptides for (Size i = 0; i < transition_exp_.getPeptides().size(); i++) { OpenSwath::LightPeptide p; OpenSwathDataAccessHelper::convertTargetedPeptide(transition_exp_.getPeptides()[i], p); transition_exp.peptides.push_back(p); } //mapping of transitions for (Size i = 0; i < transition_exp_.getTransitions().size(); i++) { OpenSwath::LightTransition t; t.transition_name = transition_exp_.getTransitions()[i].getNativeID(); t.product_mz = transition_exp_.getTransitions()[i].getProductMZ(); t.precursor_mz = transition_exp_.getTransitions()[i].getPrecursorMZ(); t.library_intensity = transition_exp_.getTransitions()[i].getLibraryIntensity(); t.peptide_ref = transition_exp_.getTransitions()[i].getPeptideRef(); t.charge = transition_exp_.getTransitions()[i].getProduct().getChargeState(); t.decoy = false; // legacy #if 1 if (transition_exp_.getTransitions()[i].getCVTerms().has("decoy") && transition_exp_.getTransitions()[i].getCVTerms()["decoy"][0].getValue().toString() == "1" ) { t.decoy = true; } else if (transition_exp_.getTransitions()[i].getCVTerms().has("MS:1002007")) // target SRM transition { t.decoy = false; } else if (transition_exp_.getTransitions()[i].getCVTerms().has("MS:1002008")) // decoy SRM transition { t.decoy = true; } else if (transition_exp_.getTransitions()[i].getCVTerms().has("MS:1002007") && transition_exp_.getTransitions()[i].getCVTerms().has("MS:1002008")) // both == illegal { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Transition " + t.transition_name + " cannot be target and decoy at the same time."); } else #endif if (transition_exp_.getTransitions()[i].getDecoyTransitionType() == ReactionMonitoringTransition::UNKNOWN || transition_exp_.getTransitions()[i].getDecoyTransitionType() == ReactionMonitoringTransition::TARGET) { // assume its target t.decoy = false; } else if (transition_exp_.getTransitions()[i].getDecoyTransitionType() == ReactionMonitoringTransition::DECOY) { t.decoy = true; } transition_exp.transitions.push_back(t); } }
void MRMDecoy::generateDecoys(OpenMS::TargetedExperiment& exp, OpenMS::TargetedExperiment& dec, String method, String decoy_tag, double identity_threshold, int max_attempts, double mz_threshold, double mz_shift, bool exclude_similar, double similarity_threshold, bool remove_CNterminal_mods, double precursor_mass_shift, std::vector<String> fragment_types, std::vector<size_t> fragment_charges, bool enable_specific_losses, bool enable_unspecific_losses, bool remove_unannotated, int round_decPow) { MRMIonSeries mrmis; MRMDecoy::PeptideVectorType peptides, decoy_peptides; MRMDecoy::ProteinVectorType proteins, decoy_proteins; MRMDecoy::TransitionVectorType decoy_transitions; for (Size i = 0; i < exp.getProteins().size(); i++) { OpenMS::TargetedExperiment::Protein protein = exp.getProteins()[i]; protein.id = decoy_tag + protein.id; proteins.push_back(protein); } std::vector<String> exclusion_peptides; // Go through all peptides and apply the decoy method to the sequence // (pseudo-reverse, reverse or shuffle). Then set the peptides and proteins of the decoy // experiment. for (Size pep_idx = 0; pep_idx < exp.getPeptides().size(); ++pep_idx) { OpenMS::TargetedExperiment::Peptide peptide = exp.getPeptides()[pep_idx]; // continue if the peptide has C/N terminal modifications and we should exclude them if (remove_CNterminal_mods && MRMDecoy::has_CNterminal_mods(peptide)) {continue; } peptide.id = decoy_tag + peptide.id; OpenMS::String original_sequence = peptide.sequence; if (!peptide.getPeptideGroupLabel().empty()) { peptide.setPeptideGroupLabel(decoy_tag + peptide.getPeptideGroupLabel()); } if (method == "pseudo-reverse") { peptide = MRMDecoy::pseudoreversePeptide(peptide); } else if (method == "reverse") { peptide = MRMDecoy::reversePeptide(peptide); } else if (method == "shuffle") { peptide = MRMDecoy::shufflePeptide(peptide, identity_threshold, -1, max_attempts); } for (Size prot_idx = 0; prot_idx < peptide.protein_refs.size(); ++prot_idx) { peptide.protein_refs[prot_idx] = decoy_tag + peptide.protein_refs[prot_idx]; } if (MRMDecoy::AASequenceIdentity(original_sequence, peptide.sequence) > identity_threshold) { if (!exclude_similar) { std::cout << "Target sequence: " << original_sequence << " Decoy sequence: " << peptide.sequence << " Sequence identity: " << MRMDecoy::AASequenceIdentity(original_sequence, peptide.sequence) << " Identity threshold: " << identity_threshold << std::endl; throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "AA Sequences are too similar. Either decrease identity_threshold and increase max_attempts for the shuffle method or set flag exclude_similar."); } else { exclusion_peptides.push_back(peptide.id); } } peptides.push_back(peptide); } dec.setPeptides(peptides); // temporary set peptides, overwrite later again! // hash of the peptide reference containing all transitions MRMDecoy::PeptideTransitionMapType peptide_trans_map; for (Size i = 0; i < exp.getTransitions().size(); i++) { peptide_trans_map[exp.getTransitions()[i].getPeptideRef()].push_back(&exp.getTransitions()[i]); } Size progress = 0; startProgress(0, exp.getTransitions().size(), "Creating decoys"); for (MRMDecoy::PeptideTransitionMapType::iterator pep_it = peptide_trans_map.begin(); pep_it != peptide_trans_map.end(); ++pep_it) { String peptide_ref = pep_it->first; String decoy_peptide_ref = decoy_tag + pep_it->first; // see above, the decoy peptide id is computed deterministically from the target id const TargetedExperiment::Peptide target_peptide = exp.getPeptideByRef(peptide_ref); // continue if the peptide has C/N terminal modifications and we should exclude them if (remove_CNterminal_mods && MRMDecoy::has_CNterminal_mods(target_peptide)) {continue;} const TargetedExperiment::Peptide decoy_peptide = dec.getPeptideByRef(decoy_peptide_ref); OpenMS::AASequence target_peptide_sequence = TargetedExperimentHelper::getAASequence(target_peptide); OpenMS::AASequence decoy_peptide_sequence = TargetedExperimentHelper::getAASequence(decoy_peptide); int decoy_charge = 1; int target_charge = 1; if (decoy_peptide.hasCharge()) {decoy_charge = decoy_peptide.getChargeState();} if (target_peptide.hasCharge()) {target_charge = target_peptide.getChargeState();} MRMIonSeries::IonSeries decoy_ionseries = mrmis.getIonSeries(decoy_peptide_sequence, decoy_charge, fragment_types, fragment_charges, enable_specific_losses, enable_unspecific_losses, round_decPow); MRMIonSeries::IonSeries target_ionseries = mrmis.getIonSeries(target_peptide_sequence, target_charge, fragment_types, fragment_charges, enable_specific_losses, enable_unspecific_losses, round_decPow); for (Size i = 0; i < pep_it->second.size(); i++) { setProgress(++progress); const ReactionMonitoringTransition tr = *(pep_it->second[i]); if (!tr.isDetectingTransition() || tr.getDecoyTransitionType() == ReactionMonitoringTransition::DECOY) { continue; } ReactionMonitoringTransition decoy_tr = tr; // copy the target transition decoy_tr.setNativeID(decoy_tag + tr.getNativeID()); decoy_tr.setDecoyTransitionType(ReactionMonitoringTransition::DECOY); decoy_tr.setPrecursorMZ(tr.getPrecursorMZ() + precursor_mass_shift); // fix for TOPPView: Duplicate precursor MZ is not displayed. // determine the current annotation for the target ion and then select // the appropriate decoy ion for this target transition std::pair<String, double> targetion = mrmis.annotateIon(target_ionseries, tr.getProductMZ(), mz_threshold); std::pair<String, double> decoyion = mrmis.getIon(decoy_ionseries, targetion.first); if (method == "shift") { decoy_tr.setProductMZ(decoyion.second + mz_shift); } else { decoy_tr.setProductMZ(decoyion.second); } decoy_tr.setPeptideRef(decoy_tag + tr.getPeptideRef()); if (decoyion.second > 0) { if (similarity_threshold >= 0) { if (std::fabs(tr.getProductMZ() - decoy_tr.getProductMZ()) < similarity_threshold) { if (!exclude_similar) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Fragment ions are too similar. Either decrease similarity_threshold or set flag exclude_similar."); } else { exclusion_peptides.push_back(decoy_tr.getPeptideRef()); } } } decoy_transitions.push_back(decoy_tr); } else { if (remove_unannotated) { exclusion_peptides.push_back(decoy_tr.getPeptideRef()); } else { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Decoy fragment ion for target fragment ion " + String(targetion.first) + " of peptide " + target_peptide_sequence.toString() + " with precursor charge " + String(target_peptide.getChargeState()) + " could not be mapped. Please check whether it is a valid ion and enable losses or removal of terminal modifications if necessary. Skipping of unannotated target assays is available as last resort."); } } } // end loop over transitions } // end loop over peptides endProgress(); MRMDecoy::TransitionVectorType filtered_decoy_transitions; for (MRMDecoy::TransitionVectorType::iterator tr_it = decoy_transitions.begin(); tr_it != decoy_transitions.end(); ++tr_it) { if (std::find(exclusion_peptides.begin(), exclusion_peptides.end(), tr_it->getPeptideRef()) == exclusion_peptides.end()) { filtered_decoy_transitions.push_back(*tr_it); } } dec.setTransitions(filtered_decoy_transitions); std::vector<String> protein_ids; for (Size i = 0; i < peptides.size(); ++i) { TargetedExperiment::Peptide peptide = peptides[i]; // Check if peptide has any transitions left if (std::find(exclusion_peptides.begin(), exclusion_peptides.end(), peptide.id) == exclusion_peptides.end()) { decoy_peptides.push_back(peptide); for (Size j = 0; j < peptide.protein_refs.size(); ++j) { protein_ids.push_back(peptide.protein_refs[j]); } } else { LOG_DEBUG << "[peptide] Skipping " << peptide.id << std::endl; } } for (Size i = 0; i < proteins.size(); ++i) { OpenMS::TargetedExperiment::Protein protein = proteins[i]; // Check if protein has any peptides left if (find(protein_ids.begin(), protein_ids.end(), protein.id) != protein_ids.end()) { decoy_proteins.push_back(protein); } else { LOG_DEBUG << "[protein] Skipping " << protein.id << std::endl; } } dec.setPeptides(decoy_peptides); dec.setProteins(decoy_proteins); }
void ChromatogramExtractor::prepare_coordinates(std::vector< OpenSwath::ChromatogramPtr > & output_chromatograms, std::vector< ExtractionCoordinates > & coordinates, OpenMS::TargetedExperiment & transition_exp_used, bool enforce_presence_rt, const bool ms1) const { // hash of the peptide reference containing all transitions typedef std::map<String, std::vector<const ReactionMonitoringTransition*> > PeptideTransitionMapType; PeptideTransitionMapType peptide_trans_map; for (Size i = 0; i < transition_exp_used.getTransitions().size(); i++) { peptide_trans_map[transition_exp_used.getTransitions()[i].getPeptideRef()].push_back(&transition_exp_used.getTransitions()[i]); } // Determine iteration size (nr peptides or nr transitions) Size itersize; if (ms1) {itersize = transition_exp_used.getPeptides().size();} else {itersize = transition_exp_used.getTransitions().size();} for (Size i = 0; i < itersize; i++) { OpenSwath::ChromatogramPtr s(new OpenSwath::Chromatogram); output_chromatograms.push_back(s); ChromatogramExtractor::ExtractionCoordinates coord; TargetedExperiment::Peptide pep; OpenMS::ReactionMonitoringTransition transition; if (ms1) { pep = transition_exp_used.getPeptides()[i]; transition = (*peptide_trans_map[pep.id][0]); coord.mz = transition.getPrecursorMZ(); coord.id = pep.id; } else { transition = transition_exp_used.getTransitions()[i]; pep = transition_exp_used.getPeptideByRef(transition.getPeptideRef()); coord.mz = transition.getProductMZ(); coord.id = transition.getNativeID(); } if (pep.rts.empty() || pep.rts[0].getCVTerms()["MS:1000896"].empty()) { // we dont have retention times -> this is only a problem if we actually // wanted to use the RT limit feature. if (enforce_presence_rt) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "Error: Peptide " + pep.id + " does not have normalized retention times (term 1000896) which are necessary to perform an RT-limited extraction"); } coord.rt = -1; } else { coord.rt = pep.rts[0].getCVTerms()["MS:1000896"][0].getValue().toString().toDouble(); // coord.rt = trafo.apply(coord.rt); // apply RT transformation if necessary } coordinates.push_back(coord); } // sort result std::sort(coordinates.begin(), coordinates.end(), ChromatogramExtractor::ExtractionCoordinates::SortExtractionCoordinatesByMZ); }
void ChromatogramExtractor::return_chromatogram(std::vector< OpenSwath::ChromatogramPtr > & chromatograms, std::vector< ChromatogramExtractor::ExtractionCoordinates > & coordinates, OpenMS::TargetedExperiment & transition_exp_used, SpectrumSettings settings, std::vector<OpenMS::MSChromatogram<> > & output_chromatograms, bool ms1) const { typedef std::map<String, const ReactionMonitoringTransition* > TransitionMapType; TransitionMapType trans_map; for (Size i = 0; i < transition_exp_used.getTransitions().size(); i++) { trans_map[transition_exp_used.getTransitions()[i].getNativeID()] = &transition_exp_used.getTransitions()[i]; } for (Size i = 0; i < chromatograms.size(); i++) { const OpenSwath::ChromatogramPtr & chromptr = chromatograms[i]; const ChromatogramExtractor::ExtractionCoordinates & coord = coordinates[i]; TargetedExperiment::Peptide pep; OpenMS::ReactionMonitoringTransition transition; OpenMS::MSChromatogram<> chrom; // copy data OpenSwathDataAccessHelper::convertToOpenMSChromatogram(chrom, chromptr); chrom.setNativeID(coord.id); // Create precursor and set // 1) the target m/z // 2) the isolation window (upper/lower) // 3) the peptide sequence Precursor prec; if (ms1) { pep = transition_exp_used.getPeptideByRef(coord.id); prec.setMZ(coord.mz); chrom.setChromatogramType(ChromatogramSettings::BASEPEAK_CHROMATOGRAM); } else { transition = (*trans_map[coord.id]); pep = transition_exp_used.getPeptideByRef(transition.getPeptideRef()); prec.setMZ(transition.getPrecursorMZ()); if (settings.getPrecursors().size() > 0) { prec.setIsolationWindowLowerOffset(settings.getPrecursors()[0].getIsolationWindowLowerOffset()); prec.setIsolationWindowUpperOffset(settings.getPrecursors()[0].getIsolationWindowUpperOffset()); } // Create product and set its m/z Product prod; prod.setMZ(transition.getProductMZ()); chrom.setProduct(prod); chrom.setChromatogramType(ChromatogramSettings::SELECTED_REACTION_MONITORING_CHROMATOGRAM); } prec.setMetaValue("peptide_sequence", pep.sequence); chrom.setPrecursor(prec); // Set the rest of the meta-data chrom.setInstrumentSettings(settings.getInstrumentSettings()); chrom.setAcquisitionInfo(settings.getAcquisitionInfo()); chrom.setSourceFile(settings.getSourceFile()); for (Size i = 0; i < settings.getDataProcessing().size(); ++i) { DataProcessing dp = settings.getDataProcessing()[i]; dp.setMetaValue("performed_on_spectra", "true"); chrom.getDataProcessing().push_back(dp); } output_chromatograms.push_back(chrom); } }
void TransitionPQPReader::writePQPOutput_(const char* filename, OpenMS::TargetedExperiment& targeted_exp) { sqlite3 *db; char *zErrMsg = 0; int rc; // delete file if present remove(filename); // Open database rc = sqlite3_open(filename, &db); if ( rc ) { fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db)); } // Create SQL structure const char* create_sql = // protein table // OpenSWATH proteomics workflows "CREATE TABLE PROTEIN(" \ "ID INT PRIMARY KEY NOT NULL," \ "PROTEIN_ACCESSION TEXT NOT NULL," \ "DECOY INT NULL);" \ // peptide_protein_mapping table // OpenSWATH proteomics workflows "CREATE TABLE PEPTIDE_PROTEIN_MAPPING(" \ "PEPTIDE_ID INT NOT NULL," \ "PROTEIN_ID INT NOT NULL);" \ // peptide table // OpenSWATH proteomics workflows "CREATE TABLE PEPTIDE(" \ "ID INT PRIMARY KEY NOT NULL," \ "UNMODIFIED_SEQUENCE TEXT NOT NULL," \ "MODIFIED_SEQUENCE TEXT NOT NULL," \ "DECOY INT NOT NULL);" \ // precursor_peptide_mapping table // OpenSWATH proteomics workflows "CREATE TABLE PRECURSOR_PEPTIDE_MAPPING(" \ "PRECURSOR_ID INT NOT NULL," \ "PEPTIDE_ID INT NOT NULL);" \ // compound table // OpenSWATH metabolomics workflows "CREATE TABLE COMPOUND(" \ "ID INT PRIMARY KEY NOT NULL," \ "COMPOUND_NAME TEXT NOT NULL," \ "SUM_FORMULA TEXT NOT NULL," \ "SMILES TEXT NOT NULL," \ "DECOY INT NOT NULL);" \ // precursor_compound_mapping table // OpenSWATH metabolomics workflows "CREATE TABLE PRECURSOR_COMPOUND_MAPPING(" \ "PRECURSOR_ID INT NOT NULL," \ "COMPOUND_ID INT NOT NULL);" \ // precursor table "CREATE TABLE PRECURSOR(" \ "ID INT PRIMARY KEY NOT NULL," \ "TRAML_ID TEXT NULL," \ "GROUP_LABEL TEXT NULL," \ "PRECURSOR_MZ REAL NOT NULL," \ "CHARGE INT NULL," \ "LIBRARY_INTENSITY REAL NULL," \ "LIBRARY_RT REAL NULL," \ "DECOY INT NOT NULL);" \ // transition_precursor_mapping table "CREATE TABLE TRANSITION_PRECURSOR_MAPPING(" \ "TRANSITION_ID INT NOT NULL," \ "PRECURSOR_ID INT NOT NULL);" \ // transition_peptide_mapping table // IPF proteomics workflows "CREATE TABLE TRANSITION_PEPTIDE_MAPPING(" \ "TRANSITION_ID INT NOT NULL," \ "PEPTIDE_ID INT NOT NULL);" \ // transition table "CREATE TABLE TRANSITION(" \ "ID INT PRIMARY KEY NOT NULL," \ "TRAML_ID TEXT NULL," \ "PRODUCT_MZ REAL NOT NULL," \ "CHARGE INT NULL," \ "TYPE CHAR(1) NULL," \ "ORDINAL INT NULL," \ "DETECTING INT NOT NULL," \ "IDENTIFYING INT NOT NULL," \ "QUANTIFYING INT NOT NULL," \ "LIBRARY_INTENSITY REAL NULL," \ "DECOY INT NOT NULL);"; // Execute SQL create statement rc = sqlite3_exec(db, create_sql, callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Prepare insert statements // Index maps std::vector<std::string> group_set, peptide_set, compound_set, protein_set; std::map<int,double> precursor_mz_map; std::map<int,bool> precursor_decoy_map; std::stringstream insert_transition_sql, insert_transition_peptide_mapping_sql, insert_transition_precursor_mapping_sql; insert_transition_sql.precision(11); // OpenSWATH: Loop through TargetedExperiment to generate index maps for peptides Size progress = 0; startProgress(0, targeted_exp.getPeptides().size(), "Convert peptides"); for (Size i = 0; i < targeted_exp.getPeptides().size(); i++) { setProgress(progress++); OpenMS::TargetedExperiment::Peptide peptide = targeted_exp.getPeptides()[i]; std::string peptide_sequence = TargetedExperimentHelper::getAASequence(peptide).toString(); peptide_set.push_back(peptide_sequence); group_set.push_back(peptide.id); } endProgress(); // OpenSWATH: Loop through TargetedExperiment to generate index maps for compounds progress = 0; startProgress(0, targeted_exp.getCompounds().size(), "Convert compounds"); for (Size i = 0; i < targeted_exp.getCompounds().size(); i++) { setProgress(progress++); OpenMS::TargetedExperiment::Compound compound = targeted_exp.getCompounds()[i]; compound_set.push_back(compound.id); group_set.push_back(compound.id); } endProgress(); // OpenSWATH: Group set must be unique boost::erase(group_set, boost::unique<boost::return_found_end>(boost::sort(group_set))); // IPF: Loop through all transitions and generate peptidoform data structures progress = 0; std::vector<TransitionPQPReader::TSVTransition > transitions; startProgress(0, targeted_exp.getTransitions().size(), "Convert peptidoforms"); for (Size i = 0; i < targeted_exp.getTransitions().size(); i++) { setProgress(progress++); TransitionPQPReader::TSVTransition transition = convertTransition_(&targeted_exp.getTransitions()[i], targeted_exp); transitions.push_back(transition); std::copy( transition.peptidoforms.begin(), transition.peptidoforms.end(), std::inserter( peptide_set, peptide_set.end() ) ); int group_set_index = std::distance(group_set.begin(),std::find(group_set.begin(), group_set.end(), transition.group_id)); if (precursor_mz_map.find(group_set_index) == precursor_mz_map.end()) { precursor_mz_map[group_set_index] = transition.precursor; } if (precursor_decoy_map.find(group_set_index) == precursor_decoy_map.end()) { if (transition.detecting_transition == 1) { precursor_decoy_map[group_set_index] = transition.decoy; } } } endProgress(); // OpenSWATH: Peptide and compound sets must be unique boost::erase(peptide_set, boost::unique<boost::return_found_end>(boost::sort(peptide_set))); boost::erase(compound_set, boost::unique<boost::return_found_end>(boost::sort(compound_set))); // OpenSWATH: Prepare transition inserts progress = 0; startProgress(0, transitions.size(), String("Prepare ") + transitions.size() + " transitions and mapping"); for (Size i = 0; i < transitions.size(); i++) { setProgress(progress++); TransitionPQPReader::TSVTransition transition = transitions[i]; // IPF: Generate transition-peptide mapping tables (one identification transition can map to multiple peptidoforms) for (Size j = 0; j < transition.peptidoforms.size(); j++) { insert_transition_peptide_mapping_sql << "INSERT INTO TRANSITION_PEPTIDE_MAPPING (TRANSITION_ID, PEPTIDE_ID) VALUES (" << i << "," << std::distance(peptide_set.begin(),std::find(peptide_set.begin(), peptide_set.end(), transition.peptidoforms[j])) << "); "; } // OpenSWATH: Associate transitions with their precursors insert_transition_precursor_mapping_sql << "INSERT INTO TRANSITION_PRECURSOR_MAPPING (TRANSITION_ID, PRECURSOR_ID) VALUES (" << i << "," << std::distance(group_set.begin(), std::find(group_set.begin(), group_set.end(),transition.group_id)) << "); "; std::string transition_charge = "NULL"; // workaround for compounds with missing charge if (transition.fragment_charge != "NA") { transition_charge = transition.fragment_charge; } // OpenSWATH: Insert transition data insert_transition_sql << "INSERT INTO TRANSITION (ID, TRAML_ID, PRODUCT_MZ, CHARGE, TYPE, ORDINAL, DETECTING, IDENTIFYING, QUANTIFYING, LIBRARY_INTENSITY, DECOY) VALUES (" << i << ",'" << transition.transition_name << "'," << transition.product << "," << transition_charge << ",'" << transition.fragment_type<< "'," << transition.fragment_nr << "," << transition.detecting_transition << "," << transition.identifying_transition << "," << transition.quantifying_transition << "," << transition.library_intensity << "," << transition.decoy << "); "; } endProgress(); // OpenSWATH: Prepare protein inserts progress = 0; startProgress(0, targeted_exp.getProteins().size(), "Prepare protein mapping"); for (Size i = 0; i < targeted_exp.getProteins().size(); i++) { setProgress(progress++); OpenMS::TargetedExperiment::Protein protein = targeted_exp.getProteins()[i]; protein_set.push_back(protein.id); } endProgress(); boost::erase(protein_set, boost::unique<boost::return_found_end>(boost::sort(protein_set))); std::stringstream insert_precursor_sql, insert_precursor_peptide_mapping, insert_precursor_compound_mapping; insert_precursor_sql.precision(11); std::vector<std::pair<int, int> > peptide_protein_map; // OpenSWATH: Prepare peptide precursor inserts progress = 0; startProgress(0, targeted_exp.getPeptides().size(), "Prepare peptide precursors and mapping"); for (Size i = 0; i < targeted_exp.getPeptides().size(); i++) { setProgress(progress++); OpenMS::TargetedExperiment::Peptide peptide = targeted_exp.getPeptides()[i]; std::string peptide_sequence = TargetedExperimentHelper::getAASequence(peptide).toString(); int group_set_index = std::distance(group_set.begin(),std::find(group_set.begin(), group_set.end(), peptide.id)); int peptide_set_index = std::distance(peptide_set.begin(), std::find(peptide_set.begin(), peptide_set.end(), peptide_sequence)); for (std::vector<String>::iterator it = peptide.protein_refs.begin(); it != peptide.protein_refs.end(); ++it) { int protein_set_index = std::distance(protein_set.begin(),std::find(protein_set.begin(), protein_set.end(), *it)); peptide_protein_map.push_back(std::make_pair(peptide_set_index,protein_set_index)); } insert_precursor_sql << "INSERT INTO PRECURSOR (ID, TRAML_ID, GROUP_LABEL, PRECURSOR_MZ, CHARGE, LIBRARY_INTENSITY, LIBRARY_RT, DECOY) VALUES (" << group_set_index << ",'" << peptide.id << "','" << peptide.getPeptideGroupLabel() << "'," << precursor_mz_map[group_set_index] << "," << peptide.getChargeState() << ",NULL," << peptide.getRetentionTime() << "," << precursor_decoy_map[group_set_index] << "); "; insert_precursor_peptide_mapping << "INSERT INTO PRECURSOR_PEPTIDE_MAPPING (PRECURSOR_ID, PEPTIDE_ID) VALUES (" << group_set_index << "," << peptide_set_index << "); "; } endProgress(); // OpenSWATH: Prepare compound precursor inserts progress = 0; startProgress(0, targeted_exp.getCompounds().size(), "Prepare compound precursors and mapping"); for (Size i = 0; i < targeted_exp.getCompounds().size(); i++) { setProgress(progress++); OpenMS::TargetedExperiment::Compound compound = targeted_exp.getCompounds()[i]; int group_set_index = std::distance(group_set.begin(),std::find(group_set.begin(), group_set.end(), compound.id)); int compound_set_index = std::distance(compound_set.begin(), std::find(compound_set.begin(), compound_set.end(), compound.id)); std::string compound_charge = "NULL"; // workaround for compounds with missing charge if (compound.hasCharge()) { compound_charge = String(compound.getChargeState()); } insert_precursor_sql << "INSERT INTO PRECURSOR (ID, TRAML_ID, GROUP_LABEL, PRECURSOR_MZ, CHARGE, LIBRARY_INTENSITY, LIBRARY_RT, DECOY) VALUES (" << group_set_index << ",'" << compound.id << "',NULL," << precursor_mz_map[group_set_index] << "," << compound_charge << ",NULL,NULL" << "," << precursor_decoy_map[group_set_index] << "); "; insert_precursor_compound_mapping << "INSERT INTO PRECURSOR_COMPOUND_MAPPING (PRECURSOR_ID, COMPOUND_ID) VALUES (" << group_set_index << "," << compound_set_index << "); "; } endProgress(); boost::erase(peptide_protein_map, boost::unique<boost::return_found_end>(boost::sort(peptide_protein_map))); // OpenSWATH: Prepare peptide-protein mapping inserts std::stringstream insert_peptide_protein_mapping; progress = 0; startProgress(0, peptide_protein_map.size(), "Prepare peptide - protein mapping"); for (std::vector<std::pair<int, int> >::iterator it = peptide_protein_map.begin(); it != peptide_protein_map.end(); ++it) { setProgress(progress++); insert_peptide_protein_mapping << "INSERT INTO PEPTIDE_PROTEIN_MAPPING (PEPTIDE_ID, PROTEIN_ID) VALUES (" << it->first << "," << it->second << "); "; } endProgress(); // OpenSWATH: Prepare protein inserts std::stringstream insert_protein_sql; progress = 0; startProgress(0, protein_set.size(), String("Prepare ") + protein_set.size() + " proteins"); for (Size i = 0; i < protein_set.size(); i++) { setProgress(progress++); insert_protein_sql << "INSERT INTO PROTEIN (ID, PROTEIN_ACCESSION) VALUES (" << i << ",'" << protein_set[i] << "'); "; } endProgress(); // OpenSWATH: Prepare peptide inserts std::stringstream insert_peptide_sql; progress = 0; startProgress(0, peptide_set.size(), String("Prepare ") + peptide_set.size() + " peptides"); for (std::vector<std::string>::iterator it = peptide_set.begin(); it != peptide_set.end(); ++it) { setProgress(progress++); insert_peptide_sql << "INSERT INTO PEPTIDE (ID, UNMODIFIED_SEQUENCE, MODIFIED_SEQUENCE, DECOY) VALUES (" << std::distance(peptide_set.begin(),std::find(peptide_set.begin(), peptide_set.end(),*it)) << ",'" << AASequence::fromString(*it).toUnmodifiedString() << "','" << *it << "'," << 0 <<"); "; } endProgress(); // OpenSWATH: Prepare compound inserts std::stringstream insert_compound_sql; progress = 0; startProgress(0, compound_set.size(), String("Prepare ") + compound_set.size() + " compounds"); for (std::vector<std::string>::iterator it = compound_set.begin(); it != compound_set.end(); ++it) { setProgress(progress++); OpenMS::TargetedExperiment::Compound compound = targeted_exp.getCompoundByRef(*it); insert_compound_sql << "INSERT INTO COMPOUND (ID, COMPOUND_NAME, SUM_FORMULA, SMILES, DECOY) VALUES (" << std::distance(compound_set.begin(),std::find(compound_set.begin(), compound_set.end(),*it)) << ",'" << compound.id << "','" << compound.molecular_formula << "','" << compound.smiles_string << "'," << 0 <<"); "; } endProgress(); std::cout << "Write PQP file" << std::endl; sqlite3_exec(db, "BEGIN TRANSACTION", NULL, NULL, &zErrMsg); // Execute SQL insert statement std::string insert_protein_sql_str = insert_protein_sql.str(); rc = sqlite3_exec(db, insert_protein_sql_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_peptide_protein_mapping_str = insert_peptide_protein_mapping.str(); rc = sqlite3_exec(db, insert_peptide_protein_mapping_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_peptide_sql_str = insert_peptide_sql.str(); rc = sqlite3_exec(db, insert_peptide_sql_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_compound_sql_str = insert_compound_sql.str(); rc = sqlite3_exec(db, insert_compound_sql_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_precursor_peptide_mapping_str = insert_precursor_peptide_mapping.str(); rc = sqlite3_exec(db, insert_precursor_peptide_mapping_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_precursor_compound_mapping_str = insert_precursor_compound_mapping.str(); rc = sqlite3_exec(db, insert_precursor_compound_mapping_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_precursor_sql_str = insert_precursor_sql.str(); rc = sqlite3_exec(db, insert_precursor_sql_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_transition_sql_str = insert_transition_sql.str(); rc = sqlite3_exec(db, insert_transition_sql_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_transition_peptide_mapping_sql_str = insert_transition_peptide_mapping_sql.str(); rc = sqlite3_exec(db, insert_transition_peptide_mapping_sql_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } // Execute SQL insert statement std::string insert_transition_precursor_mapping_sql_str = insert_transition_precursor_mapping_sql.str(); rc = sqlite3_exec(db, insert_transition_precursor_mapping_sql_str.c_str(), callback, 0, &zErrMsg); if ( rc != SQLITE_OK ) { sqlite3_free(zErrMsg); throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, zErrMsg); } sqlite3_exec(db, "END TRANSACTION", NULL, NULL, &zErrMsg); sqlite3_close(db); }