ExitCodes main_(int , const char**) { String out_meta = getStringOption_("out"); String out_cached = out_meta + ".cached"; bool convert_back = getFlag_("convert_back"); FileHandler fh; //input file type String in = getStringOption_("in"); String in_cached = in + ".cached"; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } //output file names and types String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } if (in_type == FileTypes::SQMASS && out_type == FileTypes::MZML) { MapType exp; SqMassFile sqfile; MzMLFile f; sqfile.load(in, exp); f.store(out, exp); return EXECUTION_OK; } else if (in_type == FileTypes::MZML && out_type == FileTypes::SQMASS) { MzMLFile f; SqMassFile sqfile; MapType exp; f.load(in, exp); sqfile.store(out, exp); return EXECUTION_OK; } if (!convert_back) { MapType exp; CachedmzML cacher; MzMLFile f; cacher.setLogType(log_type_); f.setLogType(log_type_); f.load(in,exp); cacher.writeMemdump(exp, out_cached); cacher.writeMetadata(exp, out_meta, true); } else { MzMLFile f; MapType meta_exp; CachedmzML cacher; MapType exp_reading; cacher.setLogType(log_type_); f.setLogType(log_type_); f.load(in,meta_exp); cacher.readMemdump(exp_reading, in_cached); std::cout << " read back, got " << exp_reading.size() << " spectra " << exp_reading.getChromatograms().size() << " chromats " << std::endl; { for (Size i=0; i<meta_exp.size(); ++i) { for (Size j = 0; j < meta_exp[i].getDataProcessing().size(); j++) { if (meta_exp[i].getDataProcessing()[j]->metaValueExists("cached_data")) { meta_exp[i].getDataProcessing()[j]->removeMetaValue("cached_data"); } } } for (Size i=0; i < meta_exp.getNrChromatograms(); ++i) { for (Size j = 0; j < meta_exp.getChromatogram(i).getDataProcessing().size(); j++) { if (meta_exp.getChromatogram(i).getDataProcessing()[j]->metaValueExists("cached_data")) { meta_exp.getChromatogram(i).getDataProcessing()[j]->removeMetaValue("cached_data"); } } } } if (meta_exp.size() != exp_reading.size()) { std::cerr << " Both experiments need to have the same size!"; } for (Size i=0; i<exp_reading.size(); ++i) { for (Size j = 0; j < exp_reading[i].size(); j++) { meta_exp[i].push_back(exp_reading[i][j]); } } std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = exp_reading.getChromatograms(); std::vector<MSChromatogram<ChromatogramPeak> > old_chromatograms = meta_exp.getChromatograms(); for (Size i=0; i<chromatograms.size(); ++i) { for (Size j = 0; j < chromatograms[i].size(); j++) { old_chromatograms[i].push_back(chromatograms[i][j]); } } meta_exp.setChromatograms(old_chromatograms); f.store(out_meta,meta_exp); } return EXECUTION_OK; }
ExitCodes main_(int , const char**) { String in = getStringOption_("in"); String out_meta = getStringOption_("out"); String in_cached = in + ".cached"; String out_cached = out_meta + ".cached"; bool convert_back = getFlag_("convert_back"); if (!convert_back) { MapType exp; CachedmzML cacher; MzMLFile f; cacher.setLogType(log_type_); f.setLogType(log_type_); f.load(in,exp); cacher.writeMemdump(exp, out_cached); DataProcessing dp; std::set<DataProcessing::ProcessingAction> actions; actions.insert(DataProcessing::FORMAT_CONVERSION); dp.setProcessingActions(actions); dp.setMetaValue("cached_data", "true"); for (Size i=0; i<exp.size(); ++i) { exp[i].getDataProcessing().push_back(dp); } std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = exp.getChromatograms(); for (Size i=0; i<chromatograms.size(); ++i) { chromatograms[i].getDataProcessing().push_back(dp); } exp.setChromatograms(chromatograms); cacher.writeMetadata(exp, out_meta); } else { MzMLFile f; MapType meta_exp; CachedmzML cacher; MapType exp_reading; cacher.setLogType(log_type_); f.setLogType(log_type_); f.load(in,meta_exp); cacher.readMemdump(exp_reading, in_cached); std::cout << " read back, got " << exp_reading.size() << " spectra " << exp_reading.getChromatograms().size() << " chromats " << std::endl; { for (Size i=0; i<meta_exp.size(); ++i) { for (Size j = 0; j < meta_exp[i].getDataProcessing().size(); j++) { DataProcessing& dp = meta_exp[i].getDataProcessing()[j]; if (dp.metaValueExists("cached_data")) { dp.removeMetaValue("cached_data"); } } } std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = meta_exp.getChromatograms(); for (Size i=0; i<chromatograms.size(); ++i) { for (Size j = 0; j < chromatograms[i].getDataProcessing().size(); j++) { DataProcessing& dp = chromatograms[i].getDataProcessing()[j]; if (dp.metaValueExists("cached_data")) { dp.removeMetaValue("cached_data"); } } } } if (meta_exp.size() != exp_reading.size()) { std::cerr << " Both experiments need to have the same size!"; } for (Size i=0; i<exp_reading.size(); ++i) { for (Size j = 0; j < exp_reading[i].size(); j++) { meta_exp[i].push_back(exp_reading[i][j]); } } std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = exp_reading.getChromatograms(); std::vector<MSChromatogram<ChromatogramPeak> > old_chromatograms = meta_exp.getChromatograms(); for (Size i=0; i<chromatograms.size(); ++i) { for (Size j = 0; j < chromatograms[i].size(); j++) { old_chromatograms[i].push_back(chromatograms[i][j]); } } meta_exp.setChromatograms(old_chromatograms); f.store(out_meta,meta_exp); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input file names String in = getStringOption_("in"); String read_method = getStringOption_("read_method"); bool load_data = getStringOption_("loadData") == "true"; if (read_method == "streaming") { std::cout << "Read method: streaming" << std::endl; // Create the consumer, set output file name, transform TICConsumer consumer; MzMLFile mzml; mzml.setLogType(log_type_); PeakFileOptions opt = mzml.getOptions(); opt.setFillData(load_data); // whether to actually load any data opt.setSkipXMLChecks(true); // save time by not checking base64 strings for whitespaces opt.setMaxDataPoolSize(100); opt.setAlwaysAppendData(false); mzml.setOptions(opt); mzml.transform(in, &consumer, true, true); std::cout << "There are " << consumer.nr_spectra << " spectra and " << consumer.nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << consumer.TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "regular") { std::cout << "Read method: regular" << std::endl; MzMLFile mzml; mzml.setLogType(log_type_); PeakFileOptions opt = mzml.getOptions(); opt.setFillData(load_data); // whether to actually load any data opt.setSkipXMLChecks(true); // save time by not checking base64 strings for whitespaces mzml.setOptions(opt); PeakMap map; mzml.load(in, map); double TIC = 0.0; long int nr_peaks = 0; for (Size i =0; i < map.size(); i++) { nr_peaks += map[i].size(); for (Size j = 0; j < map[i].size(); j++) { TIC += map[i][j].getIntensity(); } } std::cout << "There are " << map.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "indexed") { std::cout << "Read method: indexed" << std::endl; IndexedMzMLFileLoader imzml; // load data from an indexed MzML file OnDiscPeakMap map; imzml.load(in, map); double TIC = 0.0; long int nr_peaks = 0; if (load_data) { for (Size i =0; i < map.getNrSpectra(); i++) { OpenMS::Interfaces::SpectrumPtr sptr = map.getSpectrumById(i); nr_peaks += sptr->getIntensityArray()->data.size(); TIC += std::accumulate(sptr->getIntensityArray()->data.begin(), sptr->getIntensityArray()->data.end(), 0.0); } } std::cout << "There are " << map.getNrSpectra() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "indexed_parallel") { std::cout << "Read method: indexed (parallel)" << std::endl; IndexedMzMLFileLoader imzml; PeakFileOptions opt = imzml.getOptions(); opt.setFillData(load_data); // whether to actually load any data imzml.setOptions(opt); // load data from an indexed MzML file OnDiscPeakMap map; map.openFile(in, true); map.setSkipXMLChecks(true); double TIC = 0.0; long int nr_peaks = 0; if (load_data) { // firstprivate means that each thread has its own instance of the // variable, each copy initialized with the initial value #ifdef _OPENMP #pragma omp parallel for firstprivate(map) #endif for (SignedSize i =0; i < (SignedSize)map.getNrSpectra(); i++) { OpenMS::Interfaces::SpectrumPtr sptr = map.getSpectrumById(i); double nr_peaks_l = sptr->getIntensityArray()->data.size(); double TIC_l = std::accumulate(sptr->getIntensityArray()->data.begin(), sptr->getIntensityArray()->data.end(), 0.0); #ifdef _OPENMP #pragma omp critical (indexed) #endif { TIC += TIC_l; nr_peaks += nr_peaks_l; } } } std::cout << "There are " << map.getNrSpectra() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "cached") { std::cout << "Read method: cached" << std::endl; // Special handling of cached mzML as input types: // we expect two paired input files which we should read into exp std::vector<String> split_out; in.split(".cachedMzML", split_out); if (split_out.size() != 2) { LOG_ERROR << "Cannot deduce base path from input '" << in << "' (note that '.cachedMzML' should only occur once as the final ending)" << std::endl; return ILLEGAL_PARAMETERS; } String in_meta = split_out[0] + ".mzML"; MzMLFile f; f.setLogType(log_type_); CachedmzML cacher; cacher.setLogType(log_type_); CachedmzML cache; cache.createMemdumpIndex(in); const std::vector<std::streampos> spectra_index = cache.getSpectraIndex(); std::ifstream ifs_; ifs_.open(in.c_str(), std::ios::binary); double TIC = 0.0; long int nr_peaks = 0; for (Size i=0; i < spectra_index.size(); ++i) { BinaryDataArrayPtr mz_array(new BinaryDataArray); BinaryDataArrayPtr intensity_array(new BinaryDataArray); int ms_level = -1; double rt = -1.0; ifs_.seekg(spectra_index[i]); CachedmzML::readSpectrumFast(mz_array, intensity_array, ifs_, ms_level, rt); nr_peaks += intensity_array->data.size(); for (Size j = 0; j < intensity_array->data.size(); j++) { TIC += intensity_array->data[j]; } } std::cout << "There are " << spectra_index.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "cached_parallel") { std::cout << "Read method: cached parallel" << std::endl; // Special handling of cached mzML as input types: // we expect two paired input files which we should read into exp std::vector<String> split_out; in.split(".cachedMzML", split_out); if (split_out.size() != 2) { LOG_ERROR << "Cannot deduce base path from input '" << in << "' (note that '.cachedMzML' should only occur once as the final ending)" << std::endl; return ILLEGAL_PARAMETERS; } String in_meta = split_out[0] + ".mzML"; MzMLFile f; f.setLogType(log_type_); CachedmzML cacher; cacher.setLogType(log_type_); CachedmzML cache; cache.createMemdumpIndex(in); const std::vector<std::streampos> spectra_index = cache.getSpectraIndex(); FileAbstraction filestream(in); double TIC = 0.0; long int nr_peaks = 0; #ifdef _OPENMP #pragma omp parallel for firstprivate(filestream) #endif for (SignedSize i=0; i < (SignedSize)spectra_index.size(); ++i) { BinaryDataArrayPtr mz_array(new BinaryDataArray); BinaryDataArrayPtr intensity_array(new BinaryDataArray); int ms_level = -1; double rt = -1.0; // we only change the position of the thread-local filestream filestream.getStream().seekg(spectra_index[i]); CachedmzML::readSpectrumFast(mz_array, intensity_array, filestream.getStream(), ms_level, rt); double nr_peaks_l = intensity_array->data.size(); double TIC_l = std::accumulate(intensity_array->data.begin(), intensity_array->data.end(), 0.0); #ifdef _OPENMP #pragma omp critical (indexed) #endif { TIC += TIC_l; nr_peaks += nr_peaks_l; } } std::cout << "There are " << spectra_index.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } return EXECUTION_OK; }