void MapAlignmentTransformer::transformSingleConsensusMap(ConsensusMap & cmap, const TransformationDescription & trafo) { for (ConsensusMap::Iterator cmit = cmap.begin(); cmit != cmap.end(); ++cmit) { applyToConsensusFeature_(*cmit, trafo); } // adapt RT values of unassigned peptides: if (!cmap.getUnassignedPeptideIdentifications().empty()) { transformSinglePeptideIdentification( cmap.getUnassignedPeptideIdentifications(), trafo); } }
void FeatureGroupingAlgorithmQT::group_(const vector<MapType>& maps, ConsensusMap& out) { // check that the number of maps is ok: if (maps.size() < 2) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "At least two maps must be given!"); } QTClusterFinder cluster_finder; cluster_finder.setParameters(param_.copy("", true)); cluster_finder.run(maps, out); StringList ms_run_locations; // add protein IDs and unassigned peptide IDs to the result map here, // to keep the same order as the input maps (useful for output later): for (typename vector<MapType>::const_iterator map_it = maps.begin(); map_it != maps.end(); ++map_it) { // add protein identifications to result map: out.getProteinIdentifications().insert( out.getProteinIdentifications().end(), map_it->getProteinIdentifications().begin(), map_it->getProteinIdentifications().end()); // add unassigned peptide identifications to result map: out.getUnassignedPeptideIdentifications().insert( out.getUnassignedPeptideIdentifications().end(), map_it->getUnassignedPeptideIdentifications().begin(), map_it->getUnassignedPeptideIdentifications().end()); } // canonical ordering for checking the results: out.sortByQuality(); out.sortByMaps(); out.sortBySize(); return; }
void MetaDataBrowser::add(ConsensusMap & map) { //identifier add(static_cast<DocumentIdentifier &>(map)); // protein identifications for (Size i = 0; i < map.getProteinIdentifications().size(); ++i) { add(map.getProteinIdentifications()[i]); } //unassigned peptide ids for (Size i = 0; i < map.getUnassignedPeptideIdentifications().size(); ++i) { add(map.getUnassignedPeptideIdentifications()[i]); } add(static_cast<MetaInfoInterface &>(map)); treeview_->expandItem(treeview_->findItems(QString::number(0), Qt::MatchExactly, 1).first()); }
void IDMapper::annotate(ConsensusMap & map, const std::vector<PeptideIdentification> & ids, const std::vector<ProteinIdentification> & protein_ids, bool measure_from_subelements) { // validate "RT" and "MZ" metavalues exist checkHits_(ids); //append protein identifications to Map map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end()); //keep track of assigned/unassigned peptide identifications std::map<Size, Size> assigned; // store which peptides fit which feature (and avoid double entries) // consensusMap -> {peptide_index} std::vector<std::set<size_t> > mapping(map.size()); DoubleList mz_values; DoubleReal rt_pep; IntList charges; //iterate over the peptide IDs for (Size i = 0; i < ids.size(); ++i) { if (ids[i].getHits().empty()) continue; getIDDetails_(ids[i], rt_pep, mz_values, charges); //iterate over the features for (Size cm_index = 0; cm_index < map.size(); ++cm_index) { // if set to TRUE, we leave the i_mz-loop as we added the whole ID with all hits bool was_added = false; // was current pep-m/z matched?! // iterate over m/z values of pepIds for (Size i_mz = 0; i_mz < mz_values.size(); ++i_mz) { DoubleReal mz_pep = mz_values[i_mz]; // charge states to use for checking: IntList current_charges; if (!ignore_charge_) { // if "mz_ref." is "precursor", we have only one m/z value to check, // but still one charge state per peptide hit that could match: if (mz_values.size() == 1) { current_charges = charges; } else { current_charges.push_back(charges[i_mz]); } current_charges.push_back(0); // "not specified" always matches } //check if we compare distance from centroid or subelements if (!measure_from_subelements) { if (isMatch_(rt_pep - map[cm_index].getRT(), mz_pep, map[cm_index].getMZ()) && (ignore_charge_ || ListUtils::contains(current_charges, map[cm_index].getCharge()))) { was_added = true; map[cm_index].getPeptideIdentifications().push_back(ids[i]); ++assigned[i]; } } else { for (ConsensusFeature::HandleSetType::const_iterator it_handle = map[cm_index].getFeatures().begin(); it_handle != map[cm_index].getFeatures().end(); ++it_handle) { if (isMatch_(rt_pep - it_handle->getRT(), mz_pep, it_handle->getMZ()) && (ignore_charge_ || ListUtils::contains(current_charges, it_handle->getCharge()))) { was_added = true; if (mapping[cm_index].count(i) == 0) { map[cm_index].getPeptideIdentifications().push_back(ids[i]); ++assigned[i]; mapping[cm_index].insert(i); } break; // we added this peptide already.. no need to check other handles } } // continue to here } if (was_added) break; } // m/z values to check // break to here } // features } // Identifications Size matches_none(0); Size matches_single(0); Size matches_multi(0); //append unassigned peptide identifications for (Size i = 0; i < ids.size(); ++i) { if (assigned[i] == 0) { map.getUnassignedPeptideIdentifications().push_back(ids[i]); ++matches_none; } else if (assigned[i] == 1) { ++matches_single; } else if (assigned[i] > 1) { ++matches_multi; } } //some statistics output LOG_INFO << "Unassigned peptides: " << matches_none << "\n" << "Peptides assigned to exactly one feature: " << matches_single << "\n" << "Peptides assigned to multiple features: " << matches_multi << std::endl; }
ExitCodes main_(int, const char **) { FeatureGroupingAlgorithmUnlabeled * algorithm = new FeatureGroupingAlgorithmUnlabeled(); //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList ins; ins = getStringList_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- // check if all input files have the correct type FileTypes::Type file_type = FileHandler::getType(ins[0]); for (Size i = 0; i < ins.size(); ++i) { if (FileHandler::getType(ins[i]) != file_type) { writeLog_("Error: All input files must be of the same type!"); return ILLEGAL_PARAMETERS; } } //------------------------------------------------------------- // set up algorithm //------------------------------------------------------------- Param algorithm_param = getParam_().copy("algorithm:", true); writeDebug_("Used algorithm parameters", algorithm_param, 3); algorithm->setParameters(algorithm_param); Size reference_index(0); //------------------------------------------------------------- // perform grouping //------------------------------------------------------------- // load input ConsensusMap out_map; StringList ms_run_locations; if (file_type == FileTypes::FEATUREXML) { // use map with highest number of features as reference: Size max_count(0); FeatureXMLFile f; for (Size i = 0; i < ins.size(); ++i) { Size s = f.loadSize(ins[i]); if (s > max_count) { max_count = s; reference_index = i; } } // Load reference map and input it to the algorithm UInt64 ref_id; Size ref_size; std::vector<PeptideIdentification> ref_pepids; std::vector<ProteinIdentification> ref_protids; { FeatureMap map_ref; FeatureXMLFile f_fxml_tmp; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[reference_index], map_ref); algorithm->setReference(reference_index, map_ref); ref_id = map_ref.getUniqueId(); ref_size = map_ref.size(); ref_pepids = map_ref.getUnassignedPeptideIdentifications(); ref_protids = map_ref.getProteinIdentifications(); } ConsensusMap dummy; // go through all input files and add them to the result one by one for (Size i = 0; i < ins.size(); ++i) { FeatureXMLFile f_fxml_tmp; FeatureMap tmp_map; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[i], tmp_map); // copy over information on the primary MS run StringList ms_runs; tmp_map.getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); if (i != reference_index) { algorithm->addToGroup(i, tmp_map); // store some meta-data about the maps in the "dummy" object -> try to // keep the same order as they were given in the input independent of // which map is the reference. dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = tmp_map.size(); dummy.getFileDescriptions()[i].unique_id = tmp_map.getUniqueId(); // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), tmp_map.getProteinIdentifications().begin(), tmp_map.getProteinIdentifications().end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), tmp_map.getUnassignedPeptideIdentifications().begin(), tmp_map.getUnassignedPeptideIdentifications().end()); } else { // copy the meta-data from the refernce map dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = ref_size; dummy.getFileDescriptions()[i].unique_id = ref_id; // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), ref_protids.begin(), ref_protids.end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), ref_pepids.begin(), ref_pepids.end()); } } // get the resulting map out_map = algorithm->getResultMap(); // // Copy back meta-data (Protein / Peptide ids / File descriptions) // // add protein identifications to result map out_map.getProteinIdentifications().insert( out_map.getProteinIdentifications().end(), dummy.getProteinIdentifications().begin(), dummy.getProteinIdentifications().end()); // add unassigned peptide identifications to result map out_map.getUnassignedPeptideIdentifications().insert( out_map.getUnassignedPeptideIdentifications().end(), dummy.getUnassignedPeptideIdentifications().begin(), dummy.getUnassignedPeptideIdentifications().end()); out_map.setFileDescriptions(dummy.getFileDescriptions()); // canonical ordering for checking the results, and the ids have no real meaning anyway // the way this was done in DelaunayPairFinder and StablePairFinder // -> the same ordering as FeatureGroupingAlgorithmUnlabeled::group applies! out_map.sortByMZ(); out_map.updateRanges(); } else { vector<ConsensusMap> maps(ins.size()); ConsensusXMLFile f; for (Size i = 0; i < ins.size(); ++i) { f.load(ins[i], maps[i]); StringList ms_runs; maps[i].getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); } // group algorithm->FeatureGroupingAlgorithm::group(maps, out_map); // set file descriptions: bool keep_subelements = getFlag_("keep_subelements"); if (!keep_subelements) { for (Size i = 0; i < ins.size(); ++i) { out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = maps[i].size(); out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId(); } } else { // components of the output map are not the input maps themselves, but // the components of the input maps: algorithm->transferSubelements(maps, out_map); } } // assign unique ids out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); // annotate output with data processing info addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING)); out_map.setPrimaryMSRunPath(ms_run_locations); // write output ConsensusXMLFile().store(out, out_map); // some statistics map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } LOG_INFO << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } LOG_INFO << " total: " << setw(6) << out_map.size() << endl; delete algorithm; return EXECUTION_OK; }
void FeatureGroupingAlgorithmUnlabeled::group(const std::vector<FeatureMap> & maps, ConsensusMap & out) { // check that the number of maps is ok if (maps.size() < 2) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "At least two maps must be given!"); } // define reference map (the one with most peaks) Size reference_map_index = 0; Size max_count = 0; for (Size m = 0; m < maps.size(); ++m) { if (maps[m].size() > max_count) { max_count = maps[m].size(); reference_map_index = m; } } std::vector<ConsensusMap> input(2); // build a consensus map of the elements of the reference map (contains only singleton consensus elements) MapConversion::convert(reference_map_index, maps[reference_map_index], input[0]); // loop over all other maps, extend the groups StablePairFinder pair_finder; pair_finder.setParameters(param_.copy("", true)); for (Size i = 0; i < maps.size(); ++i) { if (i != reference_map_index) { MapConversion::convert(i, maps[i], input[1]); // compute the consensus of the reference map and map i ConsensusMap result; pair_finder.run(input, result); input[0].swap(result); } } // replace result with temporary map out.swap(input[0]); // copy back the input maps (they have been deleted while swapping) out.getFileDescriptions() = input[0].getFileDescriptions(); // add protein IDs and unassigned peptide IDs to the result map here, // to keep the same order as the input maps (useful for output later) for (std::vector<FeatureMap>::const_iterator map_it = maps.begin(); map_it != maps.end(); ++map_it) { // add protein identifications to result map out.getProteinIdentifications().insert( out.getProteinIdentifications().end(), map_it->getProteinIdentifications().begin(), map_it->getProteinIdentifications().end()); // add unassigned peptide identifications to result map out.getUnassignedPeptideIdentifications().insert( out.getUnassignedPeptideIdentifications().end(), map_it->getUnassignedPeptideIdentifications().begin(), map_it->getUnassignedPeptideIdentifications().end()); } // canonical ordering for checking the results, and the ids have no real meaning anyway #if 1 // the way this was done in DelaunayPairFinder and StablePairFinder out.sortByMZ(); #else out.sortByQuality(); out.sortByMaps(); out.sortBySize(); #endif return; }
void LabeledPairFinder::run(const vector<ConsensusMap>& input_maps, ConsensusMap& result_map) { if (input_maps.size() != 1) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "exactly one input map required"); if (result_map.getFileDescriptions().size() != 2) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "two file descriptions required"); if (result_map.getFileDescriptions().begin()->second.filename != result_map.getFileDescriptions().rbegin()->second.filename) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the two file descriptions have to contain the same file name"); checkIds_(input_maps); //look up the light and heavy index Size light_index = numeric_limits<Size>::max(); Size heavy_index = numeric_limits<Size>::max(); for (ConsensusMap::FileDescriptions::const_iterator it = result_map.getFileDescriptions().begin(); it != result_map.getFileDescriptions().end(); ++it) { if (it->second.label == "heavy") { heavy_index = it->first; } else if (it->second.label == "light") { light_index = it->first; } } if (light_index == numeric_limits<Size>::max() || heavy_index == numeric_limits<Size>::max()) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the input maps have to be labeled 'light' and 'heavy'"); } result_map.clear(false); // sort consensus features by RT (and MZ) to speed up searching afterwards typedef ConstRefVector<ConsensusMap> RefMap; RefMap model_ref(input_maps[0].begin(), input_maps[0].end()); model_ref.sortByPosition(); //calculate matches ConsensusMap matches; //settings double rt_pair_dist = param_.getValue("rt_pair_dist"); double rt_dev_low = param_.getValue("rt_dev_low"); double rt_dev_high = param_.getValue("rt_dev_high"); double mz_dev = param_.getValue("mz_dev"); DoubleList mz_pair_dists = param_.getValue("mz_pair_dists"); bool mrm = param_.getValue("mrm").toBool(); //estimate RT parameters if (param_.getValue("rt_estimate") == "true") { //find all possible RT distances of features with the same charge and a good m/z distance vector<double> dists; dists.reserve(model_ref.size()); for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it) { for (RefMap::const_iterator it2 = model_ref.begin(); it2 != model_ref.end(); ++it2) { for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it) { double mz_pair_dist = *dist_it; if (it2->getCharge() == it->getCharge() && it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev && it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev) { dists.push_back(it2->getRT() - it->getRT()); } } } } if (dists.empty()) { cout << "Warning: Could not find pairs for RT distance estimation. The manual settings are used!" << endl; } else { if (dists.size() < 50) { cout << "Warning: Found only " << dists.size() << " pairs. The estimated shift and std deviation are probably not reliable!" << endl; } //--------------------------- estimate initial parameters of fit --------------------------- GaussFitter::GaussFitResult result(-1, -1, -1); //first estimate of the optimal shift: median of the distances sort(dists.begin(), dists.end()); Size median_index = dists.size() / 2; result.x0 = dists[median_index]; //create histogram of distances //consider only the maximum of pairs, centered around the optimal shift Size max_pairs = model_ref.size() / 2; Size start_index = (Size) max((SignedSize)0, (SignedSize)(median_index - max_pairs / 2)); Size end_index = (Size) min((SignedSize)(dists.size() - 1), (SignedSize)(median_index + max_pairs / 2)); double start_value = dists[start_index]; double end_value = dists[end_index]; double bin_step = fabs(end_value - start_value) / 99.999; //ensure that we have 100 bins Math::Histogram<> hist(start_value, end_value, bin_step); //std::cout << "HIST from " << start_value << " to " << end_value << " (bin size " << bin_step << ")" << endl; for (Size i = start_index; i <= end_index; ++i) { hist.inc(dists[i]); } //cout << hist << endl; dists.clear(); //determine median of bins (uniform background distribution) vector<Size> bins(hist.begin(), hist.end()); sort(bins.begin(), bins.end()); Size bin_median = bins[bins.size() / 2]; bins.clear(); //estimate scale A: maximum of the histogram Size max_value = hist.maxValue(); result.A = max_value - bin_median; //overwrite estimate of x0 with the position of the highest bin for (Size i = 0; i < hist.size(); ++i) { if (hist[i] == max_value) { result.x0 = hist.centerOfBin(i); break; } } //estimate sigma: first time the count is less or equal the median count in the histogram double pos = result.x0; while (pos > start_value && hist.binValue(pos) > bin_median) { pos -= bin_step; } double sigma_low = result.x0 - pos; pos = result.x0; while (pos<end_value&& hist.binValue(pos)> bin_median) { pos += bin_step; } double sigma_high = pos - result.x0; result.sigma = (sigma_high + sigma_low) / 6.0; //cout << "estimated optimal RT distance (before fit): " << result.x0 << endl; //cout << "estimated allowed deviation (before fit): " << result.sigma*3.0 << endl; //--------------------------- do gauss fit --------------------------- vector<DPosition<2> > points(hist.size()); for (Size i = 0; i < hist.size(); ++i) { points[i][0] = hist.centerOfBin(i); points[i][1] = max(0u, hist[i]); } GaussFitter fitter; fitter.setInitialParameters(result); result = fitter.fit(points); cout << "estimated optimal RT distance: " << result.x0 << endl; cout << "estimated allowed deviation: " << fabs(result.sigma) * 3.0 << endl; rt_pair_dist = result.x0; rt_dev_low = fabs(result.sigma) * 3.0; rt_dev_high = fabs(result.sigma) * 3.0; } } // check each feature for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it) { for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it) { double mz_pair_dist = *dist_it; RefMap::const_iterator it2 = lower_bound(model_ref.begin(), model_ref.end(), it->getRT() + rt_pair_dist - rt_dev_low, ConsensusFeature::RTLess()); while (it2 != model_ref.end() && it2->getRT() <= it->getRT() + rt_pair_dist + rt_dev_high) { // if in mrm mode, we need to compare precursor mass difference and fragment mass difference, charge remains the same double prec_mz_diff(0); if (mrm) { prec_mz_diff = fabs((double)it2->getMetaValue("MZ") - (double)it->getMetaValue("MZ")); if (it->getCharge() != 0) { prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist / it->getCharge()); } else { prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist); } } bool mrm_correct_dist(false); double frag_mz_diff = fabs(it->getMZ() - it2->getMZ()); //cerr << it->getRT() << " charge1=" << it->getCharge() << ", charge2=" << it2->getCharge() << ", prec_diff=" << prec_mz_diff << ", frag_diff=" << frag_mz_diff << endl; if (mrm && it2->getCharge() == it->getCharge() && prec_mz_diff < mz_dev && (frag_mz_diff < mz_dev || fabs(frag_mz_diff - mz_pair_dist) < mz_dev)) { mrm_correct_dist = true; //cerr << "mrm_correct_dist" << endl; } if ((mrm && mrm_correct_dist) || (!mrm && it2->getCharge() == it->getCharge() && it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev && it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev )) { //cerr << "dist correct" << endl; double score = sqrt( PValue_(it2->getMZ() - it->getMZ(), mz_pair_dist / it->getCharge(), mz_dev, mz_dev) * PValue_(it2->getRT() - it->getRT(), rt_pair_dist, rt_dev_low, rt_dev_high) ); // Note: we used to copy the id from the light feature here, but that strategy does not generalize to more than two labels. // We might want to report consensus features where the light one is missing but more than one heavier variant was found. // Also, the old strategy is inconsistent with what was done in the unlabeled case. Thus now we assign a new unique id here. matches.push_back(ConsensusFeature()); matches.back().setUniqueId(); matches.back().insert(light_index, *it); matches.back().clearMetaInfo(); matches.back().insert(heavy_index, *it2); matches.back().setQuality(score); matches.back().setCharge(it->getCharge()); matches.back().computeMonoisotopicConsensus(); } ++it2; } } } //compute best pairs // - sort matches by quality // - take highest-quality matches first (greedy) and mark them as used set<Size> used_features; matches.sortByQuality(true); for (ConsensusMap::const_iterator match = matches.begin(); match != matches.end(); ++match) { //check if features are not used yet if (used_features.find(match->begin()->getUniqueId()) == used_features.end() && used_features.find(match->rbegin()->getUniqueId()) == used_features.end() ) { //if unused, add it to the final set of elements result_map.push_back(*match); used_features.insert(match->begin()->getUniqueId()); used_features.insert(match->rbegin()->getUniqueId()); } } //Add protein identifications to result map for (Size i = 0; i < input_maps.size(); ++i) { result_map.getProteinIdentifications().insert(result_map.getProteinIdentifications().end(), input_maps[i].getProteinIdentifications().begin(), input_maps[i].getProteinIdentifications().end()); } //Add unassigned peptide identifications to result map for (Size i = 0; i < input_maps.size(); ++i) { result_map.getUnassignedPeptideIdentifications().insert(result_map.getUnassignedPeptideIdentifications().end(), input_maps[i].getUnassignedPeptideIdentifications().begin(), input_maps[i].getUnassignedPeptideIdentifications().end()); } // Very useful for checking the results, and the ids have no real meaning anyway result_map.sortByMZ(); }
ExitCodes main_(int, const char **) { String in = getStringOption_("in"), out = getStringOption_("out"), id_out = getStringOption_("id_out"); if (out.empty() && id_out.empty()) { throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, __PRETTY_FUNCTION__, "out/id_out"); } vector<ProteinIdentification> proteins; vector<PeptideIdentification> peptides; FileTypes::Type in_type = FileHandler::getType(in); if (in_type == FileTypes::MZML) { MSExperiment<> experiment; MzMLFile().load(in, experiment); // what about unassigned peptide IDs? for (MSExperiment<>::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) { peptides.insert(peptides.end(), exp_it->getPeptideIdentifications().begin(), exp_it->getPeptideIdentifications().end()); exp_it->getPeptideIdentifications().clear(); } experiment.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(experiment, getProcessingInfo_(DataProcessing::FILTERING)); MzMLFile().store(out, experiment); } } else if (in_type == FileTypes::FEATUREXML) { FeatureMap features; FeatureXMLFile().load(in, features); features.getUnassignedPeptideIdentifications().swap(peptides); for (FeatureMap::Iterator feat_it = features.begin(); feat_it != features.end(); ++feat_it) { peptides.insert(peptides.end(), feat_it->getPeptideIdentifications().begin(), feat_it->getPeptideIdentifications().end()); feat_it->getPeptideIdentifications().clear(); } features.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(features, getProcessingInfo_(DataProcessing::FILTERING)); FeatureXMLFile().store(out, features); } } else // consensusXML { ConsensusMap consensus; ConsensusXMLFile().load(in, consensus); consensus.getUnassignedPeptideIdentifications().swap(peptides); for (ConsensusMap::Iterator cons_it = consensus.begin(); cons_it != consensus.end(); ++cons_it) { peptides.insert(peptides.end(), cons_it->getPeptideIdentifications().begin(), cons_it->getPeptideIdentifications().end()); cons_it->getPeptideIdentifications().clear(); } consensus.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(consensus, getProcessingInfo_(DataProcessing::FILTERING)); ConsensusXMLFile().store(out, consensus); } } if (!id_out.empty()) { // IDMapper can match a peptide ID to several overlapping features, // resulting in duplicates; this shouldn't be the case for peak data if (in_type != FileTypes::MZML) removeDuplicates_(peptides); IdXMLFile().store(id_out, proteins, peptides); } return EXECUTION_OK; }
TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[0].getSequence(), "ABCDEFG") TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[1].getSequence(), "HIJKLMN") TEST_EQUAL(map.getProteinIdentifications()[1].getHits().size(), 1) TEST_EQUAL(map.getProteinIdentifications()[1].getHits()[0].getSequence(), "OPQREST") //peptide identifications TEST_EQUAL(map[0].getPeptideIdentifications().size(), 2) TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), "A") TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits().size(), 2) TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[0].getSequence(), "C") TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[1].getSequence(), "D") TEST_EQUAL(map[1].getPeptideIdentifications().size(), 1) TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits()[0].getSequence(), "E") //unassigned peptide identifications TEST_EQUAL(map.getUnassignedPeptideIdentifications().size(), 2) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits()[0].getSequence(), "F") TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits().size(), 2) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[0].getSequence(), "G") TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[1].getSequence(), "H") //features TEST_EQUAL(map.size(), 6) ConsensusFeature cons_feature = map[0]; TEST_REAL_SIMILAR(cons_feature.getRT(), 1273.27) TEST_REAL_SIMILAR(cons_feature.getMZ(), 904.47) TEST_REAL_SIMILAR(cons_feature.getIntensity(), 3.12539e+07) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1273.27) TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1273.27) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 904.47)
TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[0].getSequence(), "ABCDEFG") TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[1].getSequence(), "HIJKLMN") TEST_EQUAL(map.getProteinIdentifications()[1].getHits().size(), 1) TEST_EQUAL(map.getProteinIdentifications()[1].getHits()[0].getSequence(), "OPQREST") //peptide identifications TEST_EQUAL(map[0].getPeptideIdentifications().size(), 2) TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("A")) TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits().size(), 2) TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[0].getSequence(), AASequence::fromString("C")) TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[1].getSequence(), AASequence::fromString("D")) TEST_EQUAL(map[1].getPeptideIdentifications().size(), 1) TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits()[0].getSequence(),AASequence::fromString( "E")) //unassigned peptide identifications TEST_EQUAL(map.getUnassignedPeptideIdentifications().size(), 2) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits()[0].getSequence(), AASequence::fromString("F")) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits().size(), 2) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[0].getSequence(), AASequence::fromString("G")) TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[1].getSequence(), AASequence::fromString("H")) //features TEST_EQUAL(map.size(), 6) ConsensusFeature cons_feature = map[0]; TEST_REAL_SIMILAR(cons_feature.getRT(), 1273.27) TEST_REAL_SIMILAR(cons_feature.getMZ(), 904.47) TEST_REAL_SIMILAR(cons_feature.getIntensity(), 3.12539e+07) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1273.27) TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1273.27) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 904.47)