ExitCodes main_(int, const char **) { FeatureGroupingAlgorithmUnlabeled * algorithm = new FeatureGroupingAlgorithmUnlabeled(); //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList ins; ins = getStringList_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- // check if all input files have the correct type FileTypes::Type file_type = FileHandler::getType(ins[0]); for (Size i = 0; i < ins.size(); ++i) { if (FileHandler::getType(ins[i]) != file_type) { writeLog_("Error: All input files must be of the same type!"); return ILLEGAL_PARAMETERS; } } //------------------------------------------------------------- // set up algorithm //------------------------------------------------------------- Param algorithm_param = getParam_().copy("algorithm:", true); writeDebug_("Used algorithm parameters", algorithm_param, 3); algorithm->setParameters(algorithm_param); Size reference_index(0); //------------------------------------------------------------- // perform grouping //------------------------------------------------------------- // load input ConsensusMap out_map; StringList ms_run_locations; if (file_type == FileTypes::FEATUREXML) { // use map with highest number of features as reference: Size max_count(0); FeatureXMLFile f; for (Size i = 0; i < ins.size(); ++i) { Size s = f.loadSize(ins[i]); if (s > max_count) { max_count = s; reference_index = i; } } // Load reference map and input it to the algorithm UInt64 ref_id; Size ref_size; std::vector<PeptideIdentification> ref_pepids; std::vector<ProteinIdentification> ref_protids; { FeatureMap map_ref; FeatureXMLFile f_fxml_tmp; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[reference_index], map_ref); algorithm->setReference(reference_index, map_ref); ref_id = map_ref.getUniqueId(); ref_size = map_ref.size(); ref_pepids = map_ref.getUnassignedPeptideIdentifications(); ref_protids = map_ref.getProteinIdentifications(); } ConsensusMap dummy; // go through all input files and add them to the result one by one for (Size i = 0; i < ins.size(); ++i) { FeatureXMLFile f_fxml_tmp; FeatureMap tmp_map; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[i], tmp_map); // copy over information on the primary MS run StringList ms_runs; tmp_map.getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); if (i != reference_index) { algorithm->addToGroup(i, tmp_map); // store some meta-data about the maps in the "dummy" object -> try to // keep the same order as they were given in the input independent of // which map is the reference. dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = tmp_map.size(); dummy.getFileDescriptions()[i].unique_id = tmp_map.getUniqueId(); // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), tmp_map.getProteinIdentifications().begin(), tmp_map.getProteinIdentifications().end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), tmp_map.getUnassignedPeptideIdentifications().begin(), tmp_map.getUnassignedPeptideIdentifications().end()); } else { // copy the meta-data from the refernce map dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = ref_size; dummy.getFileDescriptions()[i].unique_id = ref_id; // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), ref_protids.begin(), ref_protids.end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), ref_pepids.begin(), ref_pepids.end()); } } // get the resulting map out_map = algorithm->getResultMap(); // // Copy back meta-data (Protein / Peptide ids / File descriptions) // // add protein identifications to result map out_map.getProteinIdentifications().insert( out_map.getProteinIdentifications().end(), dummy.getProteinIdentifications().begin(), dummy.getProteinIdentifications().end()); // add unassigned peptide identifications to result map out_map.getUnassignedPeptideIdentifications().insert( out_map.getUnassignedPeptideIdentifications().end(), dummy.getUnassignedPeptideIdentifications().begin(), dummy.getUnassignedPeptideIdentifications().end()); out_map.setFileDescriptions(dummy.getFileDescriptions()); // canonical ordering for checking the results, and the ids have no real meaning anyway // the way this was done in DelaunayPairFinder and StablePairFinder // -> the same ordering as FeatureGroupingAlgorithmUnlabeled::group applies! out_map.sortByMZ(); out_map.updateRanges(); } else { vector<ConsensusMap> maps(ins.size()); ConsensusXMLFile f; for (Size i = 0; i < ins.size(); ++i) { f.load(ins[i], maps[i]); StringList ms_runs; maps[i].getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); } // group algorithm->FeatureGroupingAlgorithm::group(maps, out_map); // set file descriptions: bool keep_subelements = getFlag_("keep_subelements"); if (!keep_subelements) { for (Size i = 0; i < ins.size(); ++i) { out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = maps[i].size(); out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId(); } } else { // components of the output map are not the input maps themselves, but // the components of the input maps: algorithm->transferSubelements(maps, out_map); } } // assign unique ids out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); // annotate output with data processing info addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING)); out_map.setPrimaryMSRunPath(ms_run_locations); // write output ConsensusXMLFile().store(out, out_map); // some statistics map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } LOG_INFO << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } LOG_INFO << " total: " << setw(6) << out_map.size() << endl; delete algorithm; return EXECUTION_OK; }
ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm, bool labeled = false) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList ins; if (labeled) ins.push_back(getStringOption_("in")); else ins = getStringList_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- // check if all input files have the correct type FileTypes::Type file_type = FileHandler::getType(ins[0]); for (Size i = 0; i < ins.size(); ++i) { if (FileHandler::getType(ins[i]) != file_type) { writeLog_("Error: All input files must be of the same type!"); return ILLEGAL_PARAMETERS; } } //------------------------------------------------------------- // set up algorithm //------------------------------------------------------------- Param algorithm_param = getParam_().copy("algorithm:", true); writeDebug_("Used algorithm parameters", algorithm_param, 3); algorithm->setParameters(algorithm_param); //------------------------------------------------------------- // perform grouping //------------------------------------------------------------- // load input ConsensusMap out_map; StringList ms_run_locations; if (file_type == FileTypes::FEATUREXML) { vector<ConsensusMap > maps(ins.size()); FeatureXMLFile f; FeatureFileOptions param = f.getOptions(); // to save memory don't load convex hulls and subordinates param.setLoadSubordinates(false); param.setLoadConvexHull(false); f.setOptions(param); Size progress = 0; setLogType(ProgressLogger::CMD); startProgress(0, ins.size(), "reading input"); for (Size i = 0; i < ins.size(); ++i) { FeatureMap tmp; f.load(ins[i], tmp); out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = tmp.size(); out_map.getFileDescriptions()[i].unique_id = tmp.getUniqueId(); // copy over information on the primary MS run const StringList& ms_runs = tmp.getPrimaryMSRunPath(); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); // to save memory, remove convex hulls, subordinates: for (FeatureMap::Iterator it = tmp.begin(); it != tmp.end(); ++it) { it->getSubordinates().clear(); it->getConvexHulls().clear(); it->clearMetaInfo(); } MapConversion::convert(i, tmp, maps[i]); maps[i].updateRanges(); setProgress(progress++); } endProgress(); // exception for "labeled" algorithms: copy file descriptions if (labeled) { out_map.getFileDescriptions()[1] = out_map.getFileDescriptions()[0]; out_map.getFileDescriptions()[0].label = "light"; out_map.getFileDescriptions()[1].label = "heavy"; } // group algorithm->group(maps, out_map); } else { vector<ConsensusMap> maps(ins.size()); ConsensusXMLFile f; for (Size i = 0; i < ins.size(); ++i) { f.load(ins[i], maps[i]); maps[i].updateRanges(); // copy over information on the primary MS run const StringList& ms_runs = maps[i].getPrimaryMSRunPath(); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); } // group algorithm->group(maps, out_map); // set file descriptions: bool keep_subelements = getFlag_("keep_subelements"); if (!keep_subelements) { for (Size i = 0; i < ins.size(); ++i) { out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = maps[i].size(); out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId(); } } else { // components of the output map are not the input maps themselves, but // the components of the input maps: algorithm->transferSubelements(maps, out_map); } } // assign unique ids out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); // annotate output with data processing info addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING)); // set primary MS runs out_map.setPrimaryMSRunPath(ms_run_locations); // write output ConsensusXMLFile().store(out, out_map); // some statistics map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } LOG_INFO << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } LOG_INFO << " total: " << setw(6) << out_map.size() << endl; return EXECUTION_OK; }
ExitCodes outputTo(ostream& os) { //------------------------------------------------------------- // Parameter handling //------------------------------------------------------------- // File names String in = getStringOption_("in"); // File type FileHandler fh; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } MSExperiment<Peak1D> exp; FeatureMap feat; ConsensusMap cons; if (in_type == FileTypes::FEATUREXML) //features { FeatureXMLFile().load(in, feat); feat.updateRanges(); } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { ConsensusXMLFile().load(in, cons); cons.updateRanges(); } //------------------------------------------------------------- // meta information //------------------------------------------------------------- if (getFlag_("m")) { os << endl << "-- General information --" << endl << endl << "file name: " << in << endl << "file type: " << FileTypes::typeToName(in_type) << endl; //basic info os << endl << "-- Meta information --" << endl << endl; if (in_type == FileTypes::FEATUREXML) //features { os << "Document id : " << feat.getIdentifier() << endl << endl; } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { os << "Document id : " << cons.getIdentifier() << endl << endl; } } //------------------------------------------------------------- // data processing //------------------------------------------------------------- if (getFlag_("p")) { //basic info os << endl << "-- Data processing information --" << endl << endl; //get data processing info vector<DataProcessing> dp; if (in_type == FileTypes::FEATUREXML) //features { dp = feat.getDataProcessing(); } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { dp = cons.getDataProcessing(); } int i = 0; for (vector<DataProcessing>::iterator it = dp.begin(); it != dp.end(); ++it) { os << "Data processing " << i << endl; os << "\tcompletion_time: " << (*it).getCompletionTime().getDate() << 'T' << (*it).getCompletionTime().getTime() << endl; os << "\tsoftware name: " << (*it).getSoftware().getName() << " version " << (*it).getSoftware().getVersion() << endl; for (set<DataProcessing::ProcessingAction>::const_iterator paIt = (*it).getProcessingActions().begin(); paIt != (*it).getProcessingActions().end(); ++paIt) { os << "\t\tprocessing action: " << DataProcessing::NamesOfProcessingAction[*paIt] << endl; } } ++i; } //------------------------------------------------------------- // statistics //------------------------------------------------------------- if (getFlag_("s")) { //------------------------------------------------------------- // Content statistics //------------------------------------------------------------- Map<String, int> meta_names; if (in_type == FileTypes::FEATUREXML) //features { os << "Number of features: " << feat.size() << endl << endl << "Ranges:" << endl << " retention time: " << String::number(feat.getMin()[Peak2D::RT], 2) << " : " << String::number(feat.getMax()[Peak2D::RT], 2) << endl << " mass-to-charge: " << String::number(feat.getMin()[Peak2D::MZ], 2) << " : " << String::number(feat.getMax()[Peak2D::MZ], 2) << endl << " intensity: " << String::number(feat.getMinInt(), 2) << " : " << String::number(feat.getMaxInt(), 2) << endl << endl; // Charge distribution Map<UInt, UInt> charges; for (Size i = 0; i < feat.size(); ++i) { charges[feat[i].getCharge()]++; } os << "Charge distribution" << endl; for (Map<UInt, UInt>::const_iterator it = charges.begin(); it != charges.end(); ++it) { os << "charge " << it->first << ": " << it->second << endl; } } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = cons.begin(); cmit != cons.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } os << endl << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { os << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } os << " total: " << setw(6) << cons.size() << endl << endl; os << "Ranges:" << endl << " retention time: " << String::number(cons.getMin()[Peak2D::RT], 2) << " : " << String::number(cons.getMax()[Peak2D::RT], 2) << endl << " mass-to-charge: " << String::number(cons.getMin()[Peak2D::MZ], 2) << " : " << String::number(cons.getMax()[Peak2D::MZ], 2) << endl << " intensity: " << String::number(cons.getMinInt(), 2) << " : " << String::number(cons.getMaxInt(), 2) << endl; // file descriptions const ConsensusMap::FileDescriptions& descs = cons.getFileDescriptions(); if (!descs.empty()) { os << endl << "File descriptions:" << endl; for (ConsensusMap::FileDescriptions::const_iterator it = descs.begin(); it != descs.end(); ++it) { os << " - " << it->second.filename << endl << " identifier: " << it->first << endl << " label : " << it->second.label << endl << " size : " << it->second.size << endl; } } } os << endl << "-- Summary Statistics --" << endl << endl; } if (in_type == FileTypes::FEATUREXML) //features { feat.sortByRT(); vector<double> slice_stats; Size n = getIntOption_("n"); Size begin = 0; Size end = 0; os << "#slice\tRT_begin\tRT_end\tnumber_of_features\ttic\t" << "int_mean\tint_stddev\tint_min\tint_max\tint_median\tint_lowerq\tint_upperq\t" << "mz_mean\tmz_stddev\tmz_min\tmz_max\tmz_median\tmz_lowerq\tmz_upperq\t" << "width_mean\twidth_stddev\twidth_min\twidth_max\twidth_median\twidth_lowerq\twidth_upperq\t" << "qual_mean\tqual_stddev\tqual_min\tqual_max\tqual_median\tqual_lowerq\tqual_upperq\t" << "rt_qual_mean\trt_qual_stddev\trt_qual_min\trt_qual_max\trt_qual_median\trt_qual_lowerq\trt_qual_upperq\t" << "mz_qual_mean\tmz_qual_stddev\tmz_qual_min\tmz_qual_max\tmz_qual_median\tmz_qual_lowerq\tmz_qual_upperq" << endl; double rt_begin = 0.0; for (Size slice = 0; slice < n; ++slice) { // Determine slice boundaries. double rt_end = feat.back().getRT() / (double)n * (slice + 1); for (end = begin; end < feat.size() && feat[end].getRT() < rt_end; ++end) {} // Compute statistics on all features in this slice. slice_stats = sliceStatistics(feat, begin, end); // Write the beginning and end of the slices to the output as well as the slice index. os << slice << "\t" << rt_begin << "\t" << rt_end << "\t" << end - begin << "\t"; // Write the statistics as a line of an csv file copy(slice_stats.begin(), slice_stats.end(), ostream_iterator<double>(os, "\t")); os << endl; begin = end; rt_begin = rt_end; } } else if (in_type == FileTypes::CONSENSUSXML) //consensus features { Size size = cons.size(); vector<double> intensities; intensities.reserve(size); vector<double> qualities(size); qualities.reserve(size); vector<double> widths(size); widths.reserve(size); vector<double> rt_delta_by_elems; vector<double> rt_aad_by_elems; vector<double> rt_aad_by_cfs; rt_aad_by_cfs.reserve(size); vector<double> mz_delta_by_elems; vector<double> mz_aad_by_elems; vector<double> mz_aad_by_cfs; mz_aad_by_cfs.reserve(size); vector<double> it_delta_by_elems; vector<double> it_aad_by_elems; vector<double> it_aad_by_cfs; it_aad_by_cfs.reserve(size); for (ConsensusMap::const_iterator cm_iter = cons.begin(); cm_iter != cons.end(); ++cm_iter) { double rt_aad = 0; double mz_aad = 0; double it_aad = 0; intensities.push_back(cm_iter->getIntensity()); qualities.push_back(cm_iter->getQuality()); widths.push_back(cm_iter->getWidth()); for (ConsensusFeature::HandleSetType::const_iterator hs_iter = cm_iter->begin(); hs_iter != cm_iter->end(); ++hs_iter) { double rt_diff = hs_iter->getRT() - cm_iter->getRT(); rt_delta_by_elems.push_back(rt_diff); if (rt_diff < 0) { rt_diff = -rt_diff; } rt_aad_by_elems.push_back(rt_diff); rt_aad += rt_diff; double mz_diff = hs_iter->getMZ() - cm_iter->getMZ(); mz_delta_by_elems.push_back(mz_diff); if (mz_diff < 0) { mz_diff = -mz_diff; } mz_aad_by_elems.push_back(mz_diff); mz_aad += mz_diff; double it_ratio = hs_iter->getIntensity() / (cm_iter->getIntensity() ? cm_iter->getIntensity() : 1.); it_delta_by_elems.push_back(it_ratio); if (it_ratio < 1.) { it_ratio = 1. / it_ratio; } it_aad_by_elems.push_back(it_ratio); it_aad += it_ratio; } if (!cm_iter->empty()) { rt_aad /= cm_iter->size(); mz_aad /= cm_iter->size(); it_aad /= cm_iter->size(); } // otherwise rt_aad etc. are 0 anyway rt_aad_by_cfs.push_back(rt_aad); mz_aad_by_cfs.push_back(mz_aad); it_aad_by_cfs.push_back(it_aad); } OpenMS::SomeStatistics some_statistics; os.precision(writtenDigits(ConsensusFeature::IntensityType())); os << "Intensities of consensus features:" << endl << some_statistics(intensities) << endl; os.precision(writtenDigits(ConsensusFeature::QualityType())); os << "Qualities of consensus features:" << endl << some_statistics(qualities) << endl; os.precision(writtenDigits(ConsensusFeature::CoordinateType())); os << "Retention time differences ( element-center, weight 1 per element):" << endl << some_statistics(rt_delta_by_elems) << endl; os << "Absolute retention time differences ( |element-center|, weight 1 per element):" << endl << some_statistics(rt_aad_by_elems) << endl; os << "Average absolute differences of retention time within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(rt_aad_by_cfs) << endl; os.precision(writtenDigits(ConsensusFeature::CoordinateType())); os << "Mass-to-charge differences ( element-center, weight 1 per element):" << endl << some_statistics(mz_delta_by_elems) << endl; os << "Absolute differences of mass-to-charge ( |element-center|, weight 1 per element):" << endl << some_statistics(mz_aad_by_elems) << endl; os << "Average absolute differences of mass-to-charge within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(mz_aad_by_cfs) << endl; os.precision(writtenDigits(ConsensusFeature::IntensityType())); os << "Intensity ratios ( element/center, weight 1 per element):" << endl << some_statistics(it_delta_by_elems) << endl; os << "Relative intensity error ( max{(element/center),(center/element)}, weight 1 per element):" << endl << some_statistics(it_aad_by_elems) << endl; os << "Average relative intensity error within consensus features ( max{(element/center),(center/element)}, weight 1 per consensus features):" << endl << some_statistics(it_aad_by_cfs) << endl; } return EXECUTION_OK; }