ExitCodes main_(int argc, const char** argv) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")), out(getStringOption_("out")); FileHandler fh; FileTypes::Type in_type = fh.getType(in); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap exp; // keep only MS2 spectra fh.getOptions().addMSLevel(2); fh.loadExperiment(in, exp, in_type, log_type_); writeDebug_(String("Spectra loaded: ") + exp.size(), 2); if (exp.getSpectra().empty()) { throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file."); } // determine type of spectral data (profile or centroided) SpectrumSettings::SpectrumType spectrum_type = exp[0].getType(); if (spectrum_type == SpectrumSettings::RAWDATA) { if (!getFlag_("force")) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag."); } } //------------------------------------------------------------- // calculations //------------------------------------------------------------- Param mascot_param = getParam_().copy("Mascot_parameters:", true); MascotGenericFile mgf_file; Param p; // TODO: switch this to mzML (much smaller) p.setValue("internal:format", "Mascot generic", "Sets the format type of the peak list, this should not be changed unless you write the header only.", ListUtils::create<String>("advanced")); p.setValue("internal:HTTP_format", "true", "Write header with MIME boundaries instead of simple key-value pairs. For HTTP submission only.", ListUtils::create<String>("advanced")); p.setValue("internal:content", "all", "Use parameter header + the peak lists with BEGIN IONS... or only one of them.", ListUtils::create<String>("advanced")); mgf_file.setParameters(mascot_param); // get the spectra into string stream writeDebug_("Writing MGF file to stream", 1); stringstream ss; mgf_file.store(ss, in, exp, true); // write in compact format // Usage of a QCoreApplication is overkill here (and ugly too), but we just use the // QEventLoop to process the signals and slots and grab the results afterwards from // the MascotRemotQuery instance char** argv2 = const_cast<char**>(argv); QCoreApplication event_loop(argc, argv2); MascotRemoteQuery* mascot_query = new MascotRemoteQuery(&event_loop); Param mascot_query_param = getParam_().copy("Mascot_server:", true); writeDebug_("Setting parameters for Mascot query", 1); mascot_query->setParameters(mascot_query_param); writeDebug_("Setting spectra for Mascot query", 1); mascot_query->setQuerySpectra(ss.str()); // remove unnecessary spectra ss.clear(); QObject::connect(mascot_query, SIGNAL(done()), &event_loop, SLOT(quit())); QTimer::singleShot(1000, mascot_query, SLOT(run())); writeDebug_("Fire off Mascot query", 1); event_loop.exec(); writeDebug_("Mascot query finished", 1); if (mascot_query->hasError()) { writeLog_("An error occurred during the query: " + mascot_query->getErrorMessage()); delete mascot_query; return EXTERNAL_PROGRAM_ERROR; } // write Mascot response to file String mascot_tmp_file_name(File::getTempDirectory() + "/" + File::getUniqueName() + "_Mascot_response"); QFile mascot_tmp_file(mascot_tmp_file_name.c_str()); mascot_tmp_file.open(QIODevice::WriteOnly); mascot_tmp_file.write(mascot_query->getMascotXMLResponse()); mascot_tmp_file.close(); // clean up delete mascot_query; vector<PeptideIdentification> pep_ids; ProteinIdentification prot_id; // set up mapping between scan numbers and retention times: MascotXMLFile::RTMapping rt_mapping; MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping); // read the response MascotXMLFile().load(mascot_tmp_file_name, prot_id, pep_ids, rt_mapping); writeDebug_("Read " + String(pep_ids.size()) + " peptide ids and " + String(prot_id.getHits().size()) + " protein identifications from Mascot", 5); // for debugging errors relating to unexpected response files if (this->debug_level_ >= 100) { writeDebug_(String("\nMascot Server Response file saved to: '") + mascot_tmp_file_name + "'. If an error occurs, send this file to the OpenMS team.\n", 100); } else { // delete file mascot_tmp_file.remove(); } // keep or delete protein identifications?! vector<ProteinIdentification> prot_ids; if (!getFlag_("keep_protein_links")) { // remove protein links from peptides for (Size i = 0; i < pep_ids.size(); ++i) { std::vector<PeptideHit> hits = pep_ids[i].getHits(); for (Size h = 0; h < hits.size(); ++h) { hits[h].setPeptideEvidences(vector<PeptideEvidence>()); } pep_ids[i].setHits(hits); } // remove proteins std::vector<ProteinHit> p_hit; prot_id.setHits(p_hit); } prot_ids.push_back(prot_id); //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile().store(out, prot_ids, pep_ids); return EXECUTION_OK; }
START_SECTION((static bool isSupported(FileTypes::Type type))) FileHandler tmp; TEST_EQUAL(false, tmp.isSupported(FileTypes::UNKNOWN)); TEST_EQUAL(true, tmp.isSupported(FileTypes::DTA)); TEST_EQUAL(true, tmp.isSupported(FileTypes::DTA2D)); TEST_EQUAL(true, tmp.isSupported(FileTypes::MZDATA)); TEST_EQUAL(true, tmp.isSupported(FileTypes::MZML)); TEST_EQUAL(true, tmp.isSupported(FileTypes::MZXML)); TEST_EQUAL(true, tmp.isSupported(FileTypes::XMASS)); TEST_EQUAL(true, tmp.isSupported(FileTypes::FEATUREXML)); END_SECTION START_SECTION((const PeakFileOptions &getOptions() const)) FileHandler a; TEST_EQUAL(a.getOptions().hasMSLevels(), false) END_SECTION START_SECTION((PeakFileOptions & getOptions())) FileHandler a; a.getOptions().addMSLevel(1); TEST_EQUAL(a.getOptions().hasMSLevels(), true); END_SECTION START_SECTION((template <class FeatureType> bool loadFeatures(const String &filename, FeatureMap<FeatureType>&map, FileTypes::Type force_type = FileTypes::UNKNOWN))) FileHandler tmp; FeatureMap map; TEST_EQUAL(tmp.loadFeatures("test.bla", map), false) TEST_EQUAL(tmp.loadFeatures(OPENMS_GET_TEST_DATA_PATH("FeatureXMLFile_2_options.featureXML"), map), true) TEST_EQUAL(map.size(), 7); TEST_EQUAL(tmp.loadFeatures(OPENMS_GET_TEST_DATA_PATH("FeatureXMLFile_2_options.featureXML"), map), true)
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // general variables and data //------------------------------------------------------------- FileHandler fh; vector<PeptideIdentification> peptide_identifications; vector<ProteinIdentification> protein_identifications; //------------------------------------------------------------- // reading input //------------------------------------------------------------- const String in = getStringOption_("in"); ProgressLogger logger; logger.setLogType(ProgressLogger::CMD); logger.startProgress(0, 1, "Loading..."); if (File::isDirectory(in)) { const String in_directory = File::absolutePath(in).ensureLastChar('/'); const String mz_file = getStringOption_("mz_file"); const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide"); UInt i = 0; FileHandler fh; FileTypes::Type type; MSExperiment<Peak1D> msexperiment; // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;) However, now String::toInt() might throw. map<Int, float> num_and_rt; vector<String> NativeID; // The mz-File (if given) if (!mz_file.empty()) { type = fh.getTypeByFileName(mz_file); fh.loadExperiment(mz_file, msexperiment, type); for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it) { String(spectra_it->getNativeID()).split('=', NativeID); try { num_and_rt[NativeID[1].toInt()] = spectra_it->getRT(); // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01 } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } } } // Get list of the actual Sequest .out-Files StringList in_files; if (!File::fileList(in_directory, String("*.out"), in_files)) { writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!"); } // Now get to work ... for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it) { vector<PeptideIdentification> peptide_ids_seq; ProteinIdentification protein_id_seq; vector<double> pvalues_seq; vector<String> in_file_vec; SequestOutfile sequest_outfile; writeDebug_(String("Reading file ") + *in_files_it, 3); try { sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide); in_files_it->split('.', in_file_vec); for (Size j = 0; j < peptide_ids_seq.size(); ++j) { // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files. peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i); Int scan_number = 0; if (!mz_file.empty()) { try { scan_number = in_file_vec[2].toInt(); peptide_ids_seq[j].setRT(num_and_rt[scan_number]); } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } catch (exception& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.what()); } //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge(); peptide_ids_seq[j].setMZ(real_mz); } writeDebug_(String("scan: ") + String(scan_number) + String(" RT: ") + String(peptide_ids_seq[j].getRT()) + " MZ: " + String(peptide_ids_seq[j].getMZ()) + " Ident: " + peptide_ids_seq[j].getIdentifier(), 4); peptide_identifications.push_back(peptide_ids_seq[j]); } protein_id_seq.setIdentifier(*in_files_it + "_" + i); protein_identifications.push_back(protein_id_seq); ++i; } catch (Exception::ParseError& pe) { writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")"); throw; } catch (...) { writeLog_(String("Error reading file: ") + *in_files_it); throw; } } writeDebug_("All files processed.", 3); } // ! directory else { FileTypes::Type in_type = fh.getType(in); if (in_type == FileTypes::PEPXML) { String exp_name = getStringOption_("mz_file"); String orig_name = getStringOption_("mz_name"); bool use_precursor_data = getFlag_("use_precursor_data"); if (exp_name.empty()) { PepXMLFile().load(in, protein_identifications, peptide_identifications, orig_name); } else { MSExperiment<> exp; fh.loadExperiment(exp_name, exp); if (!orig_name.empty()) { exp_name = orig_name; } PepXMLFile().load(in, protein_identifications, peptide_identifications, exp_name, exp, use_precursor_data); } } else if (in_type == FileTypes::IDXML) { IdXMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::MZIDENTML) { LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl; MzIdentMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::PROTXML) { protein_identifications.resize(1); peptide_identifications.resize(1); ProtXMLFile().load(in, protein_identifications[0], peptide_identifications[0]); } else if (in_type == FileTypes::OMSSAXML) { protein_identifications.resize(1); OMSSAXMLFile().load(in, protein_identifications[0], peptide_identifications, true); } else if (in_type == FileTypes::MASCOTXML) { String scan_regex = getStringOption_("scan_regex"); String exp_name = getStringOption_("mz_file"); MascotXMLFile::RTMapping rt_mapping; if (!exp_name.empty()) { PeakMap exp; // load only MS2 spectra: fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping); } protein_identifications.resize(1); MascotXMLFile().load(in, protein_identifications[0], peptide_identifications, rt_mapping, scan_regex); } else if (in_type == FileTypes::XML) { ProteinIdentification protein_id; XTandemXMLFile().load(in, protein_id, peptide_identifications); protein_id.setSearchEngineVersion(""); protein_id.setSearchEngine("XTandem"); protein_identifications.push_back(protein_id); String exp_name = getStringOption_("mz_file"); if (!exp_name.empty()) { PeakMap exp; fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it) { UInt id = (Int)it->getMetaValue("spectrum_id"); --id; // native IDs were written 1-based if (id < exp.size()) { it->setRT(exp[id].getRT()); double pre_mz(0.0); if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ(); it->setMZ(pre_mz); it->removeMetaValue("spectrum_id"); } else { LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl; } } } } else { writeLog_("Unknown input file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } } logger.endProgress(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- const String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } logger.startProgress(0, 1, "Storing..."); if (out_type == FileTypes::PEPXML) { bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed"); String mz_file = getStringOption_("mz_file"); String mz_name = getStringOption_("mz_name"); PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed); } else if (out_type == FileTypes::IDXML) { IdXMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::MZIDENTML) { MzIdentMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::FASTA) { Size count = 0; ofstream fasta(out.c_str(), ios::out); for (Size i = 0; i < peptide_identifications.size(); ++i) { for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l) { const PeptideHit& hit = peptide_identifications[i].getHits()[l]; fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++ << "|" << hit.getSequence().toString() << endl; String seq = hit.getSequence().toUnmodifiedString(); // FASTA files should have at most 60 characters of sequence info per line for (Size j = 0; j < seq.size(); j += 60) { Size k = min(j + 60, seq.size()); fasta << string(seq[j], seq[k]) << endl; } } } } else { writeLog_("Unsupported output file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } logger.endProgress(); return EXECUTION_OK; }