int ProgressLogger::get_depth() const { int depth = 1; for(ProgressLogger* i = get_subtask(); i != NULL; i = i->get_subtask()) depth += 1; return depth; }
void storeTransformationDescriptions_(const vector<TransformationDescription>& transformations, StringList& trafos) { // custom progress logger for this task: ProgressLogger progresslogger; progresslogger.setLogType(log_type_); progresslogger.startProgress(0, trafos.size(), "writing transformation files"); for (Size i = 0; i < transformations.size(); ++i) { TransformationXMLFile().store(trafos[i], transformations[i]); } progresslogger.endProgress(); }
void loadInitialMaps_(vector<MapType>& maps, StringList& ins, FileType& input_file) { // custom progress logger for this task: ProgressLogger progresslogger; progresslogger.setLogType(TOPPMapAlignerBase::log_type_); progresslogger.startProgress(0, ins.size(), "loading input files"); for (Size i = 0; i < ins.size(); ++i) { progresslogger.setProgress(i); input_file.load(ins[i], maps[i]); } progresslogger.endProgress(); }
void storeTransformedMaps_(vector<MapType>& maps, StringList& outs, FileType& output_file) { // custom progress logger for this task: ProgressLogger progresslogger; progresslogger.setLogType(log_type_); progresslogger.startProgress(0, outs.size(), "writing output files"); for (Size i = 0; i < outs.size(); ++i) { progresslogger.setProgress(i); // annotate output with data processing info: addDataProcessing_(maps[i], getProcessingInfo_(DataProcessing::ALIGNMENT)); output_file.store(outs[i], maps[i]); } progresslogger.endProgress(); }
void ConsensusMapNormalizerAlgorithmThreshold::normalizeMaps(ConsensusMap& map, const vector<double>& ratios) { ConsensusMap::Iterator cf_it; ProgressLogger progresslogger; progresslogger.setLogType(ProgressLogger::CMD); progresslogger.startProgress(0, map.size(), "normalizing maps"); for (cf_it = map.begin(); cf_it != map.end(); ++cf_it) { progresslogger.setProgress(cf_it - map.begin()); ConsensusFeature::HandleSetType::const_iterator f_it; for (f_it = cf_it->getFeatures().begin(); f_it != cf_it->getFeatures().end(); ++f_it) { f_it->asMutable().setIntensity(f_it->getIntensity() * ratios[f_it->getMapIndex()]); } } progresslogger.endProgress(); }
void TLJPak::scan(const std::vector<std::string>& lst, ProgressLogger& logger) { std::map<int, std::string> entry2name; for(std::vector<std::string>::const_iterator i = lst.begin(); i != lst.end(); ++i) { int j = lookup(*i); if (j != -1) files[j].pathname = *i; } logger.set_task_size(files.size()); for(int i = 0; i < int(files.size()); ++i) { if (files[i].is_file() && files[i].pathname.empty()) { // FIXME: Add filtering somewhere files[i].guesses = guess(i); assert(files[i].guesses.size() > 0); files[i].pathname = files[i].guesses.front(); logger.println("found " + files[i].pathname + " (guessed)"); } /* to much of an speed impact else if (!files[i].pathname.empty()) { logger.println("found " + files[i].pathname); } */ files[i].filetype = get_type(i); logger.set_task_status(i); logger.sync(); } }
ExitCodes main_(int, const char**) { // parsing parameters String in(getStringOption_("in")); String feature_in(getStringOption_("feature_in")); String out(getStringOption_("out")); double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance")); // reading input FileHandler fh; FileTypes::Type in_type = fh.getType(in); PeakMap exp; fh.loadExperiment(in, exp, in_type, log_type_, false, false); exp.sortSpectra(); FeatureMap feature_map; if (feature_in != "") { FeatureXMLFile().load(feature_in, feature_map); } // calculations FeatureFinderAlgorithmIsotopeWavelet iso_ff; Param ff_param(iso_ff.getParameters()); ff_param.setValue("max_charge", getIntOption_("max_charge")); ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold")); iso_ff.setParameters(ff_param); FeatureFinder ff; ff.setLogType(ProgressLogger::NONE); PeakMap exp2 = exp; exp2.clear(false); for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it) { if (it->size() != 0) { exp2.addSpectrum(*it); } } exp = exp2; exp.updateRanges(); // TODO check MS2 and MS1 counts ProgressLogger progresslogger; progresslogger.setLogType(log_type_); progresslogger.startProgress(0, exp.size(), "Correcting precursor masses"); for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { progresslogger.setProgress(exp.end() - it); if (it->getMSLevel() != 2) { continue; } // find first MS1 scan of the MS/MS scan PeakMap::Iterator ms1_it = it; while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1) { --ms1_it; } if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1) { writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT())); continue; } if (ms1_it->size() == 0) { writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1); continue; } PeakMap::Iterator ms2_it = ms1_it; ++ms2_it; while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2) { // first: error checks if (ms2_it->getPrecursors().empty()) { writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1); ++ms2_it; continue; } else if (ms2_it->getPrecursors().size() > 1) { writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one."); } Precursor prec = *ms2_it->getPrecursors().begin(); double prec_pos = prec.getMZ(); PeakMap new_exp; // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...) PeakSpectrum zoom_spec; for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit) { if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3) { zoom_spec.push_back(*pit); } } new_exp.addSpectrum(zoom_spec); new_exp.updateRanges(); FeatureMap features, seeds; ff.run("isotope_wavelet", new_exp, features, ff_param, seeds); if (features.empty()) { writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1); ++ms2_it; continue; } double max_int(numeric_limits<double>::min()); double min_dist(numeric_limits<double>::max()); Size max_int_feat_idx(0); for (Size i = 0; i != features.size(); ++i) { if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance && features[i].getIntensity() > max_int) { max_int_feat_idx = i; max_int = features[i].getIntensity(); min_dist = fabs(features[i].getMZ() - prec_pos); } } writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5); if (min_dist < precursor_mass_tolerance) { prec.setMZ(features[max_int_feat_idx].getMZ()); prec.setCharge(features[max_int_feat_idx].getCharge()); vector<Precursor> precs; precs.push_back(prec); ms2_it->setPrecursors(precs); writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1); } ++ms2_it; } it = --ms2_it; } progresslogger.endProgress(); // writing output fh.storeExperiment(out, exp, log_type_); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); String trafo_in = getStringOption_("trafo_in"); String trafo_out = getStringOption_("trafo_out"); Param model_params = getParam_().copy("model:", true); String model_type = model_params.getValue("type"); model_params = model_params.copy(model_type + ":", true); ProgressLogger progresslogger; progresslogger.setLogType(log_type_); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- if (out.empty() && trafo_out.empty()) { writeLog_("Error: Either a data or a transformation output file has to be provided (parameters 'out'/'trafo_out')"); return ILLEGAL_PARAMETERS; } if (in.empty() != out.empty()) { writeLog_("Error: Data input and output parameters ('in'/'out') must be used together"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // apply transformation //------------------------------------------------------------- TransformationXMLFile trafoxml; TransformationDescription trafo; trafoxml.load(trafo_in, trafo); if (model_type != "none") { trafo.fitModel(model_type, model_params); } if (getFlag_("invert")) { trafo.invert(); } if (!trafo_out.empty()) { trafoxml.store(trafo_out, trafo); } if (!in.empty()) // load input { FileTypes::Type in_type = FileHandler::getType(in); if (in_type == FileTypes::MZML) { MzMLFile file; MSExperiment<> map; applyTransformation_(in, out, trafo, file, map); } else if (in_type == FileTypes::FEATUREXML) { FeatureXMLFile file; FeatureMap map; applyTransformation_(in, out, trafo, file, map); } else if (in_type == FileTypes::CONSENSUSXML) { ConsensusXMLFile file; ConsensusMap map; applyTransformation_(in, out, trafo, file, map); } else if (in_type == FileTypes::IDXML) { IdXMLFile file; vector<ProteinIdentification> proteins; vector<PeptideIdentification> peptides; file.load(in, proteins, peptides); bool store_original_rt = getFlag_("store_original_rt"); MapAlignmentTransformer::transformRetentionTimes(peptides, trafo, store_original_rt); // no "data processing" section in idXML file.store(out, proteins, peptides); } } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { ExitCodes return_code = TOPPMapAlignerBase::checkParameters_(); if (return_code != EXECUTION_OK) return return_code; // set up alignment algorithm: MapAlignmentAlgorithmIdentification algorithm; Param algo_params = getParam_().copy("algorithm:", true); algorithm.setParameters(algo_params); algorithm.setLogType(log_type_); Int reference_index = getReference_(algorithm); // handle in- and output files: StringList input_files = getStringList_("in"); StringList output_files = getStringList_("out"); StringList trafo_files = getStringList_("trafo_out"); FileTypes::Type in_type = FileHandler::getType(input_files[0]); vector<TransformationDescription> transformations; //------------------------------------------------------------- // perform feature alignment //------------------------------------------------------------- if (in_type == FileTypes::FEATUREXML) { vector<FeatureMap> feature_maps(input_files.size()); FeatureXMLFile fxml_file; if (output_files.empty()) { // store only transformation descriptions, not transformed data => // we can load only minimum required information: fxml_file.getOptions().setLoadConvexHull(false); fxml_file.getOptions().setLoadSubordinates(false); } loadInitialMaps_(feature_maps, input_files, fxml_file); performAlignment_(algorithm, feature_maps, transformations, reference_index); if (!output_files.empty()) { storeTransformedMaps_(feature_maps, output_files, fxml_file); } } //------------------------------------------------------------- // perform consensus alignment //------------------------------------------------------------- else if (in_type == FileTypes::CONSENSUSXML) { std::vector<ConsensusMap> consensus_maps(input_files.size()); ConsensusXMLFile cxml_file; loadInitialMaps_(consensus_maps, input_files, cxml_file); performAlignment_(algorithm, consensus_maps, transformations, reference_index); if (!output_files.empty()) { storeTransformedMaps_(consensus_maps, output_files, cxml_file); } } //------------------------------------------------------------- // perform peptide alignment //------------------------------------------------------------- else if (in_type == FileTypes::IDXML) { vector<vector<ProteinIdentification> > protein_ids(input_files.size()); vector<vector<PeptideIdentification> > peptide_ids(input_files.size()); IdXMLFile idxml_file; ProgressLogger progresslogger; progresslogger.setLogType(log_type_); progresslogger.startProgress(0, input_files.size(), "loading input files"); for (Size i = 0; i < input_files.size(); ++i) { progresslogger.setProgress(i); idxml_file.load(input_files[i], protein_ids[i], peptide_ids[i]); } progresslogger.endProgress(); performAlignment_(algorithm, peptide_ids, transformations, reference_index); if (!output_files.empty()) { progresslogger.startProgress(0, output_files.size(), "writing output files"); for (Size i = 0; i < output_files.size(); ++i) { progresslogger.setProgress(i); idxml_file.store(output_files[i], protein_ids[i], peptide_ids[i]); } progresslogger.endProgress(); } } if (!trafo_files.empty()) { storeTransformationDescriptions_(transformations, trafo_files); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) override { ExitCodes ret = TOPPMapAlignerBase::checkParameters_(); if (ret != EXECUTION_OK) return ret; MapAlignmentAlgorithmPoseClustering algorithm; Param algo_params = getParam_().copy("algorithm:", true); algorithm.setParameters(algo_params); algorithm.setLogType(log_type_); StringList in_files = getStringList_("in"); StringList out_files = getStringList_("out"); StringList out_trafos = getStringList_("trafo_out"); Size reference_index = getIntOption_("reference:index"); String reference_file = getStringOption_("reference:file"); FileTypes::Type in_type = FileHandler::getType(in_files[0]); String file; if (!reference_file.empty()) { file = reference_file; reference_index = in_files.size(); // points to invalid index } else if (reference_index > 0) // normal reference (index was checked before) { file = in_files[--reference_index]; // ref. index is 1-based in parameters, but should be 0-based here } else if (reference_index == 0) // no reference given { LOG_INFO << "Picking a reference (by size) ..." << std::flush; // use map with highest number of features as reference: Size max_count(0); FeatureXMLFile f; for (Size i = 0; i < in_files.size(); ++i) { Size s = 0; if (in_type == FileTypes::FEATUREXML) { s = f.loadSize(in_files[i]); } else if (in_type == FileTypes::MZML) // this is expensive! { PeakMap exp; MzMLFile().load(in_files[i], exp); exp.updateRanges(1); s = exp.getSize(); } if (s > max_count) { max_count = s; reference_index = i; } } LOG_INFO << " done" << std::endl; file = in_files[reference_index]; } FeatureXMLFile f_fxml; if (out_files.empty()) // no need to store featureXML, thus we can load only minimum required information { f_fxml.getOptions().setLoadConvexHull(false); f_fxml.getOptions().setLoadSubordinates(false); } if (in_type == FileTypes::FEATUREXML) { FeatureMap map_ref; FeatureXMLFile f_fxml_tmp; // for the reference, we never need CH or subordinates f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(file, map_ref); algorithm.setReference(map_ref); } else if (in_type == FileTypes::MZML) { PeakMap map_ref; MzMLFile().load(file, map_ref); algorithm.setReference(map_ref); } ProgressLogger plog; plog.setLogType(log_type_); plog.startProgress(0, in_files.size(), "Aligning input maps"); Size progress(0); // thread-safe progress // TODO: it should all work on featureXML files, since we might need them for output anyway. Converting to consensusXML is just wasting memory! #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 1) #endif for (int i = 0; i < static_cast<int>(in_files.size()); ++i) { TransformationDescription trafo; if (in_type == FileTypes::FEATUREXML) { FeatureMap map; // workaround for loading: use temporary FeatureXMLFile since it is not thread-safe FeatureXMLFile f_fxml_tmp; // do not use OMP-firstprivate, since FeatureXMLFile has no copy c'tor f_fxml_tmp.getOptions() = f_fxml.getOptions(); f_fxml_tmp.load(in_files[i], map); if (i == static_cast<int>(reference_index)) trafo.fitModel("identity"); else algorithm.align(map, trafo); if (out_files.size()) { MapAlignmentTransformer::transformRetentionTimes(map, trafo); // annotate output with data processing info addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT)); f_fxml_tmp.store(out_files[i], map); } } else if (in_type == FileTypes::MZML) { PeakMap map; MzMLFile().load(in_files[i], map); if (i == static_cast<int>(reference_index)) trafo.fitModel("identity"); else algorithm.align(map, trafo); if (out_files.size()) { MapAlignmentTransformer::transformRetentionTimes(map, trafo); // annotate output with data processing info addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT)); MzMLFile().store(out_files[i], map); } } if (!out_trafos.empty()) { TransformationXMLFile().store(out_trafos[i], trafo); } #ifdef _OPENMP #pragma omp critical (MAPose_Progress) #endif { plog.setProgress(++progress); // thread safe progress counter } } plog.endProgress(); return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String in(getStringOption_("in")); String out(getStringOption_("out")); Size num_spots_per_row(getIntOption_("num_spots_per_row")); double RT_distance(getDoubleOption_("RT_distance")); //------------------------------------------------------------- // reading input //------------------------------------------------------------- PeakMap exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); //------------------------------------------------------------- // calculations //------------------------------------------------------------- ProgressLogger pl; pl.setLogType(log_type_); pl.startProgress(0, exp.size(), "Assigning pseudo RTs."); Size num_ms1(0), num_ms1_base(0), row_counter(0); bool row_to_reverse(false); double actual_RT(0); for (Size i = 0; i != exp.size(); ++i) { pl.setProgress(i); if (row_to_reverse) { actual_RT = (double)(num_ms1_base + (num_spots_per_row - row_counter)) * RT_distance; writeDebug_("RT=" + String(actual_RT) + " (modified, row_counter=" + String(row_counter) + ")", 1); } else { actual_RT = (double)num_ms1 * RT_distance; writeDebug_("RT=" + String(actual_RT), 1); } exp[i].setRT(actual_RT); if (exp[i].getMSLevel() == 1) { if (++row_counter >= num_spots_per_row) { row_counter = 0; if (row_to_reverse) { row_to_reverse = false; } else { row_to_reverse = true; } } ++num_ms1; if (!row_to_reverse) { num_ms1_base = num_ms1; } } } pl.endProgress(); // sort the spectra according to their new RT exp.sortSpectra(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- f.store(out, exp); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // general variables and data //------------------------------------------------------------- FileHandler fh; vector<PeptideIdentification> peptide_identifications; vector<ProteinIdentification> protein_identifications; //------------------------------------------------------------- // reading input //------------------------------------------------------------- const String in = getStringOption_("in"); ProgressLogger logger; logger.setLogType(ProgressLogger::CMD); logger.startProgress(0, 1, "Loading..."); if (File::isDirectory(in)) { const String in_directory = File::absolutePath(in).ensureLastChar('/'); const String mz_file = getStringOption_("mz_file"); const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide"); UInt i = 0; FileHandler fh; FileTypes::Type type; MSExperiment<Peak1D> msexperiment; // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;) However, now String::toInt() might throw. map<Int, float> num_and_rt; vector<String> NativeID; // The mz-File (if given) if (!mz_file.empty()) { type = fh.getTypeByFileName(mz_file); fh.loadExperiment(mz_file, msexperiment, type); for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it) { String(spectra_it->getNativeID()).split('=', NativeID); try { num_and_rt[NativeID[1].toInt()] = spectra_it->getRT(); // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01 } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } } } // Get list of the actual Sequest .out-Files StringList in_files; if (!File::fileList(in_directory, String("*.out"), in_files)) { writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!"); } // Now get to work ... for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it) { vector<PeptideIdentification> peptide_ids_seq; ProteinIdentification protein_id_seq; vector<double> pvalues_seq; vector<String> in_file_vec; SequestOutfile sequest_outfile; writeDebug_(String("Reading file ") + *in_files_it, 3); try { sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide); in_files_it->split('.', in_file_vec); for (Size j = 0; j < peptide_ids_seq.size(); ++j) { // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files. peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i); Int scan_number = 0; if (!mz_file.empty()) { try { scan_number = in_file_vec[2].toInt(); peptide_ids_seq[j].setRT(num_and_rt[scan_number]); } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } catch (exception& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.what()); } //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge(); peptide_ids_seq[j].setMZ(real_mz); } writeDebug_(String("scan: ") + String(scan_number) + String(" RT: ") + String(peptide_ids_seq[j].getRT()) + " MZ: " + String(peptide_ids_seq[j].getMZ()) + " Ident: " + peptide_ids_seq[j].getIdentifier(), 4); peptide_identifications.push_back(peptide_ids_seq[j]); } protein_id_seq.setIdentifier(*in_files_it + "_" + i); protein_identifications.push_back(protein_id_seq); ++i; } catch (Exception::ParseError& pe) { writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")"); throw; } catch (...) { writeLog_(String("Error reading file: ") + *in_files_it); throw; } } writeDebug_("All files processed.", 3); } // ! directory else { FileTypes::Type in_type = fh.getType(in); if (in_type == FileTypes::PEPXML) { String exp_name = getStringOption_("mz_file"); String orig_name = getStringOption_("mz_name"); bool use_precursor_data = getFlag_("use_precursor_data"); if (exp_name.empty()) { PepXMLFile().load(in, protein_identifications, peptide_identifications, orig_name); } else { MSExperiment<> exp; fh.loadExperiment(exp_name, exp); if (!orig_name.empty()) { exp_name = orig_name; } PepXMLFile().load(in, protein_identifications, peptide_identifications, exp_name, exp, use_precursor_data); } } else if (in_type == FileTypes::IDXML) { IdXMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::MZIDENTML) { LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl; MzIdentMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::PROTXML) { protein_identifications.resize(1); peptide_identifications.resize(1); ProtXMLFile().load(in, protein_identifications[0], peptide_identifications[0]); } else if (in_type == FileTypes::OMSSAXML) { protein_identifications.resize(1); OMSSAXMLFile().load(in, protein_identifications[0], peptide_identifications, true); } else if (in_type == FileTypes::MASCOTXML) { String scan_regex = getStringOption_("scan_regex"); String exp_name = getStringOption_("mz_file"); MascotXMLFile::RTMapping rt_mapping; if (!exp_name.empty()) { PeakMap exp; // load only MS2 spectra: fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping); } protein_identifications.resize(1); MascotXMLFile().load(in, protein_identifications[0], peptide_identifications, rt_mapping, scan_regex); } else if (in_type == FileTypes::XML) { ProteinIdentification protein_id; XTandemXMLFile().load(in, protein_id, peptide_identifications); protein_id.setSearchEngineVersion(""); protein_id.setSearchEngine("XTandem"); protein_identifications.push_back(protein_id); String exp_name = getStringOption_("mz_file"); if (!exp_name.empty()) { PeakMap exp; fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it) { UInt id = (Int)it->getMetaValue("spectrum_id"); --id; // native IDs were written 1-based if (id < exp.size()) { it->setRT(exp[id].getRT()); double pre_mz(0.0); if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ(); it->setMZ(pre_mz); it->removeMetaValue("spectrum_id"); } else { LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl; } } } } else { writeLog_("Unknown input file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } } logger.endProgress(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- const String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } logger.startProgress(0, 1, "Storing..."); if (out_type == FileTypes::PEPXML) { bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed"); String mz_file = getStringOption_("mz_file"); String mz_name = getStringOption_("mz_name"); PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed); } else if (out_type == FileTypes::IDXML) { IdXMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::MZIDENTML) { MzIdentMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::FASTA) { Size count = 0; ofstream fasta(out.c_str(), ios::out); for (Size i = 0; i < peptide_identifications.size(); ++i) { for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l) { const PeptideHit& hit = peptide_identifications[i].getHits()[l]; fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++ << "|" << hit.getSequence().toString() << endl; String seq = hit.getSequence().toUnmodifiedString(); // FASTA files should have at most 60 characters of sequence info per line for (Size j = 0; j < seq.size(); j += 60) { Size k = min(j + 60, seq.size()); fasta << string(seq[j], seq[k]) << endl; } } } } else { writeLog_("Unsupported output file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } logger.endProgress(); return EXECUTION_OK; }
ExitCodes main_(int, const char**) override { ExitCodes return_code = TOPPMapAlignerBase::checkParameters_(); if (return_code != EXECUTION_OK) return return_code; // set up alignment algorithm: MapAlignmentAlgorithmIdentification algorithm; Param algo_params = getParam_().copy("algorithm:", true); algorithm.setParameters(algo_params); algorithm.setLogType(log_type_); Int reference_index = getReference_(algorithm); // handle in- and output files: StringList input_files = getStringList_("in"); StringList output_files = getStringList_("out"); StringList trafo_files = getStringList_("trafo_out"); FileTypes::Type in_type = FileHandler::getType(input_files[0]); vector<TransformationDescription> transformations; //------------------------------------------------------------- // perform feature alignment //------------------------------------------------------------- if (in_type == FileTypes::FEATUREXML) { vector<FeatureMap> feature_maps(input_files.size()); FeatureXMLFile fxml_file; if (output_files.empty()) { // store only transformation descriptions, not transformed data => // we can load only minimum required information: fxml_file.getOptions().setLoadConvexHull(false); fxml_file.getOptions().setLoadSubordinates(false); } loadInitialMaps_(feature_maps, input_files, fxml_file); //------------------------------------------------------------- // Extract (optional) fraction identifiers and associate with featureXMLs //------------------------------------------------------------- String design_file = getStringOption_("design"); // determine map of fractions to runs map<unsigned, vector<String> > frac2files; // TODO: check if can be put in common helper function if (!design_file.empty()) { // parse design file and determine fractions ExperimentalDesign ed = ExperimentalDesignFile::load(design_file, false); // determine if design defines more than one fraction (note: fraction and run IDs are one-based) frac2files = ed.getFractionToMSFilesMapping(); // check if all fractions have the same number of MS runs associated if (!ed.sameNrOfMSFilesPerFraction()) { writeLog_("Error: Number of runs must match for every fraction!"); return ILLEGAL_PARAMETERS; } } else // no design file given { for (Size i = 0; i != input_files.size(); ++i) { // TODO: read proper MS file name from meta data frac2files[1].push_back("file" + String(i)); // associate each file with fraction 1 } } // TODO: check and handle if featureXML order differs from run order // perform fraction-based alignment if (frac2files.size() == 1) // group one fraction { performAlignment_(algorithm, feature_maps, transformations, reference_index); applyTransformations_(feature_maps, transformations); } else // group multiple fractions { for (Size i = 1; i <= frac2files.size(); ++i) { vector<FeatureMap> fraction_maps; vector<TransformationDescription> fraction_transformations; size_t n_fractions = frac2files.size(); // TODO FRACTIONS: determine map index based on annotated MS files (getPrimaryMSRuns()) for (size_t feature_map_index = 0; feature_map_index != n_fractions; ++feature_map_index) { fraction_maps.push_back(feature_maps[feature_map_index]); } performAlignment_(algorithm, fraction_maps, fraction_transformations, reference_index); applyTransformations_(fraction_maps, fraction_transformations); // copy into transformations and feature maps transformations.insert(transformations.end(), fraction_transformations.begin(), fraction_transformations.end()); Size f = 0; for (size_t feature_map_index = 0; feature_map_index != n_fractions; ++feature_map_index, ++f) { feature_maps[feature_map_index].swap(fraction_maps[f]); } } } if (!output_files.empty()) { storeTransformedMaps_(feature_maps, output_files, fxml_file); } } //------------------------------------------------------------- // perform consensus alignment //------------------------------------------------------------- else if (in_type == FileTypes::CONSENSUSXML) { std::vector<ConsensusMap> consensus_maps(input_files.size()); ConsensusXMLFile cxml_file; loadInitialMaps_(consensus_maps, input_files, cxml_file); performAlignment_(algorithm, consensus_maps, transformations, reference_index); applyTransformations_(consensus_maps, transformations); if (!output_files.empty()) { storeTransformedMaps_(consensus_maps, output_files, cxml_file); } } //------------------------------------------------------------- // perform peptide alignment //------------------------------------------------------------- else if (in_type == FileTypes::IDXML) { vector<vector<ProteinIdentification> > protein_ids(input_files.size()); vector<vector<PeptideIdentification> > peptide_ids(input_files.size()); IdXMLFile idxml_file; ProgressLogger progresslogger; progresslogger.setLogType(log_type_); progresslogger.startProgress(0, input_files.size(), "loading input files"); for (Size i = 0; i < input_files.size(); ++i) { progresslogger.setProgress(i); idxml_file.load(input_files[i], protein_ids[i], peptide_ids[i]); } progresslogger.endProgress(); performAlignment_(algorithm, peptide_ids, transformations, reference_index); applyTransformations_(peptide_ids, transformations); if (!output_files.empty()) { progresslogger.startProgress(0, output_files.size(), "writing output files"); for (Size i = 0; i < output_files.size(); ++i) { progresslogger.setProgress(i); idxml_file.store(output_files[i], protein_ids[i], peptide_ids[i]); } progresslogger.endProgress(); } } if (!trafo_files.empty()) { storeTransformationDescriptions_(transformations, trafo_files); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { ExitCodes ret = checkParameters_(); if (ret != EXECUTION_OK) return ret; MapAlignmentAlgorithmSpectrumAlignment algorithm; Param algo_params = getParam_().copy("algorithm:", true); algorithm.setParameters(algo_params); algorithm.setLogType(log_type_); StringList ins = getStringList_("in"); StringList outs = getStringList_("out"); StringList trafos = getStringList_("trafo_out"); Param model_params = getParam_().copy("model:", true); String model_type = model_params.getValue("type"); model_params = model_params.copy(model_type + ":", true); std::vector<TransformationDescription> transformations; //------------------------------------------------------------- // perform peak alignment //------------------------------------------------------------- ProgressLogger progresslogger; progresslogger.setLogType(log_type_); // load input std::vector<MSExperiment<> > peak_maps(ins.size()); MzMLFile f; f.setLogType(log_type_); progresslogger.startProgress(0, ins.size(), "loading input files"); for (Size i = 0; i < ins.size(); ++i) { progresslogger.setProgress(i); f.load(ins[i], peak_maps[i]); } progresslogger.endProgress(); // try to align algorithm.align(peak_maps, transformations); if (model_type != "none") { for (vector<TransformationDescription>::iterator it = transformations.begin(); it != transformations.end(); ++it) { it->fitModel(model_type, model_params); } } // write output progresslogger.startProgress(0, outs.size(), "applying RT transformations and writing output files"); for (Size i = 0; i < outs.size(); ++i) { progresslogger.setProgress(i); MapAlignmentTransformer::transformRetentionTimes(peak_maps[i], transformations[i]); // annotate output with data processing info addDataProcessing_(peak_maps[i], getProcessingInfo_(DataProcessing::ALIGNMENT)); f.store(outs[i], peak_maps[i]); } progresslogger.endProgress(); if (!trafos.empty()) { TransformationXMLFile trafo_file; for (Size i = 0; i < transformations.size(); ++i) { trafo_file.store(trafos[i], transformations[i]); } } return EXECUTION_OK; }
void QTClusterFinder::run_(const vector<MapType> & input_maps, ConsensusMap & result_map) { num_maps_ = input_maps.size(); if (num_maps_ < 2) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "At least two input maps required"); } // set up the distance functor (and set other parameters): DoubleReal max_intensity = input_maps[0].getMaxInt(); DoubleReal max_mz = input_maps[0].getMax()[1]; for (Size map_index = 1; map_index < num_maps_; ++map_index) { max_intensity = max(max_intensity, input_maps[map_index].getMaxInt()); max_mz = max(max_mz, input_maps[map_index].getMax()[0]); } setParameters_(max_intensity, max_mz); // create the hash grid and fill it with features: //cout << "Hashing..." << endl; list<GridFeature> grid_features; Grid grid(Grid::ClusterCenter(max_diff_rt_, max_diff_mz_)); for (Size map_index = 0; map_index < num_maps_; ++map_index) { for (Size feature_index = 0; feature_index < input_maps[map_index].size(); ++feature_index) { grid_features.push_back( GridFeature(input_maps[map_index][feature_index], map_index, feature_index)); GridFeature & gfeature = grid_features.back(); // sort peptide hits once now, instead of multiple times later: BaseFeature & feature = const_cast<BaseFeature &>( grid_features.back().getFeature()); for (vector<PeptideIdentification>::iterator pep_it = feature.getPeptideIdentifications().begin(); pep_it != feature.getPeptideIdentifications().end(); ++pep_it) { pep_it->sort(); } grid.insert(std::make_pair(Grid::ClusterCenter(gfeature.getRT(), gfeature.getMZ()), &gfeature)); } } // compute QT clustering: //cout << "Clustering..." << endl; list<QTCluster> clustering; computeClustering_(grid, clustering); // number of clusters == number of data points: Size size = clustering.size(); // Create a temporary map where we store which GridFeatures are next to which Clusters OpenMSBoost::unordered_map<GridFeature *, std::vector< QTCluster * > > element_mapping; for (list<QTCluster>::iterator it = clustering.begin(); it != clustering.end(); ++it) { OpenMSBoost::unordered_map<Size, GridFeature *> elements; typedef std::multimap<DoubleReal, GridFeature *> InnerNeighborMap; typedef OpenMSBoost::unordered_map<Size, InnerNeighborMap > NeighborMap; NeighborMap neigh = it->getNeighbors(); for (NeighborMap::iterator n_it = neigh.begin(); n_it != neigh.end(); ++n_it) { for (InnerNeighborMap::iterator i_it = n_it->second.begin(); i_it != n_it->second.end(); ++i_it) { element_mapping[i_it->second].push_back( &(*it) ); } } } ProgressLogger logger; logger.setLogType(ProgressLogger::CMD); logger.startProgress(0, size, "linking features"); Size progress = 0; result_map.clear(false); while (!clustering.empty()) { // cout << "Clusters: " << clustering.size() << endl; ConsensusFeature consensus_feature; makeConsensusFeature_(clustering, consensus_feature, element_mapping); if (!clustering.empty()) { result_map.push_back(consensus_feature); } logger.setProgress(progress++); } logger.endProgress(); }