void QuantitativeExperimentalDesign::applyDesign2Quantifier(PeptideAndProteinQuant & quantifier, TextFile & file, StringList & file_paths) { // vector< pair<PeptideAndProteinQuant::PeptideData,PeptideAndProteinQuant::ProteinQuant> >& result) //create mapping from experimental setting to all respective file names map<String, StringList> design2FileBaseName; mapFiles2Design_(design2FileBaseName, file); //filter out all non-existing files map<String, StringList> design2FilePath; findRelevantFilePaths_(design2FileBaseName, design2FilePath, file_paths); //determine wether we deal with idXML or featureXML FileTypes::Type in_type = FileHandler::getType(file_paths.front()); if (in_type == FileTypes::FEATUREXML) { FeatureMap<> features; for (map<String, StringList>::iterator iter = design2FilePath.begin(); iter != design2FilePath.end(); ++iter) { mergeFeatureMaps_(features, iter->first, iter->second); } LOG_INFO << "Number of proteinIdentifications: " << features.getProteinIdentifications().size() << endl; ProteinIdentification & proteins = features.getProteinIdentifications()[0]; quantifier.quantifyPeptides(features); quantifier.quantifyProteins(proteins); } else { ConsensusMap consensus; for (map<String, StringList>::iterator iter = design2FilePath.begin(); iter != design2FilePath.end(); ++iter) { mergeConsensusMaps_(consensus, iter->first, iter->second); } LOG_INFO << "Number of proteinIdentifications: " << consensus.getProteinIdentifications().size() << endl; ProteinIdentification & proteins = consensus.getProteinIdentifications()[0]; quantifier.quantifyPeptides(consensus); quantifier.quantifyProteins(proteins); } }
void FeatureGroupingAlgorithmQT::group_(const vector<MapType>& maps, ConsensusMap& out) { // check that the number of maps is ok: if (maps.size() < 2) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "At least two maps must be given!"); } QTClusterFinder cluster_finder; cluster_finder.setParameters(param_.copy("", true)); cluster_finder.run(maps, out); StringList ms_run_locations; // add protein IDs and unassigned peptide IDs to the result map here, // to keep the same order as the input maps (useful for output later): for (typename vector<MapType>::const_iterator map_it = maps.begin(); map_it != maps.end(); ++map_it) { // add protein identifications to result map: out.getProteinIdentifications().insert( out.getProteinIdentifications().end(), map_it->getProteinIdentifications().begin(), map_it->getProteinIdentifications().end()); // add unassigned peptide identifications to result map: out.getUnassignedPeptideIdentifications().insert( out.getUnassignedPeptideIdentifications().end(), map_it->getUnassignedPeptideIdentifications().begin(), map_it->getUnassignedPeptideIdentifications().end()); } // canonical ordering for checking the results: out.sortByQuality(); out.sortByMaps(); out.sortBySize(); return; }
void MetaDataBrowser::add(ConsensusMap & map) { //identifier add(static_cast<DocumentIdentifier &>(map)); // protein identifications for (Size i = 0; i < map.getProteinIdentifications().size(); ++i) { add(map.getProteinIdentifications()[i]); } //unassigned peptide ids for (Size i = 0; i < map.getUnassignedPeptideIdentifications().size(); ++i) { add(map.getUnassignedPeptideIdentifications()[i]); } add(static_cast<MetaInfoInterface &>(map)); treeview_->expandItem(treeview_->findItems(QString::number(0), Qt::MatchExactly, 1).first()); }
void ProteinInference::infer(ConsensusMap & consensus_map, const UInt reference_map) { // we infer Proteins for every IdentificationRun separately. If you want this combined, then // do that before calling this function // Each ProteinIdentification will be augmented with the quantification (where possible) for (size_t i = 0; i < consensus_map.getProteinIdentifications().size(); ++i) { infer_(consensus_map, i, reference_map); } }
void IDMapper::annotate(ConsensusMap & map, const std::vector<PeptideIdentification> & ids, const std::vector<ProteinIdentification> & protein_ids, bool measure_from_subelements) { // validate "RT" and "MZ" metavalues exist checkHits_(ids); //append protein identifications to Map map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end()); //keep track of assigned/unassigned peptide identifications std::map<Size, Size> assigned; // store which peptides fit which feature (and avoid double entries) // consensusMap -> {peptide_index} std::vector<std::set<size_t> > mapping(map.size()); DoubleList mz_values; DoubleReal rt_pep; IntList charges; //iterate over the peptide IDs for (Size i = 0; i < ids.size(); ++i) { if (ids[i].getHits().empty()) continue; getIDDetails_(ids[i], rt_pep, mz_values, charges); //iterate over the features for (Size cm_index = 0; cm_index < map.size(); ++cm_index) { // if set to TRUE, we leave the i_mz-loop as we added the whole ID with all hits bool was_added = false; // was current pep-m/z matched?! // iterate over m/z values of pepIds for (Size i_mz = 0; i_mz < mz_values.size(); ++i_mz) { DoubleReal mz_pep = mz_values[i_mz]; // charge states to use for checking: IntList current_charges; if (!ignore_charge_) { // if "mz_ref." is "precursor", we have only one m/z value to check, // but still one charge state per peptide hit that could match: if (mz_values.size() == 1) { current_charges = charges; } else { current_charges.push_back(charges[i_mz]); } current_charges.push_back(0); // "not specified" always matches } //check if we compare distance from centroid or subelements if (!measure_from_subelements) { if (isMatch_(rt_pep - map[cm_index].getRT(), mz_pep, map[cm_index].getMZ()) && (ignore_charge_ || ListUtils::contains(current_charges, map[cm_index].getCharge()))) { was_added = true; map[cm_index].getPeptideIdentifications().push_back(ids[i]); ++assigned[i]; } } else { for (ConsensusFeature::HandleSetType::const_iterator it_handle = map[cm_index].getFeatures().begin(); it_handle != map[cm_index].getFeatures().end(); ++it_handle) { if (isMatch_(rt_pep - it_handle->getRT(), mz_pep, it_handle->getMZ()) && (ignore_charge_ || ListUtils::contains(current_charges, it_handle->getCharge()))) { was_added = true; if (mapping[cm_index].count(i) == 0) { map[cm_index].getPeptideIdentifications().push_back(ids[i]); ++assigned[i]; mapping[cm_index].insert(i); } break; // we added this peptide already.. no need to check other handles } } // continue to here } if (was_added) break; } // m/z values to check // break to here } // features } // Identifications Size matches_none(0); Size matches_single(0); Size matches_multi(0); //append unassigned peptide identifications for (Size i = 0; i < ids.size(); ++i) { if (assigned[i] == 0) { map.getUnassignedPeptideIdentifications().push_back(ids[i]); ++matches_none; } else if (assigned[i] == 1) { ++matches_single; } else if (assigned[i] > 1) { ++matches_multi; } } //some statistics output LOG_INFO << "Unassigned peptides: " << matches_none << "\n" << "Peptides assigned to exactly one feature: " << matches_single << "\n" << "Peptides assigned to multiple features: " << matches_multi << std::endl; }
ExitCodes main_(int, const char **) { FeatureGroupingAlgorithmUnlabeled * algorithm = new FeatureGroupingAlgorithmUnlabeled(); //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList ins; ins = getStringList_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- // check if all input files have the correct type FileTypes::Type file_type = FileHandler::getType(ins[0]); for (Size i = 0; i < ins.size(); ++i) { if (FileHandler::getType(ins[i]) != file_type) { writeLog_("Error: All input files must be of the same type!"); return ILLEGAL_PARAMETERS; } } //------------------------------------------------------------- // set up algorithm //------------------------------------------------------------- Param algorithm_param = getParam_().copy("algorithm:", true); writeDebug_("Used algorithm parameters", algorithm_param, 3); algorithm->setParameters(algorithm_param); Size reference_index(0); //------------------------------------------------------------- // perform grouping //------------------------------------------------------------- // load input ConsensusMap out_map; StringList ms_run_locations; if (file_type == FileTypes::FEATUREXML) { // use map with highest number of features as reference: Size max_count(0); FeatureXMLFile f; for (Size i = 0; i < ins.size(); ++i) { Size s = f.loadSize(ins[i]); if (s > max_count) { max_count = s; reference_index = i; } } // Load reference map and input it to the algorithm UInt64 ref_id; Size ref_size; std::vector<PeptideIdentification> ref_pepids; std::vector<ProteinIdentification> ref_protids; { FeatureMap map_ref; FeatureXMLFile f_fxml_tmp; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[reference_index], map_ref); algorithm->setReference(reference_index, map_ref); ref_id = map_ref.getUniqueId(); ref_size = map_ref.size(); ref_pepids = map_ref.getUnassignedPeptideIdentifications(); ref_protids = map_ref.getProteinIdentifications(); } ConsensusMap dummy; // go through all input files and add them to the result one by one for (Size i = 0; i < ins.size(); ++i) { FeatureXMLFile f_fxml_tmp; FeatureMap tmp_map; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[i], tmp_map); // copy over information on the primary MS run StringList ms_runs; tmp_map.getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); if (i != reference_index) { algorithm->addToGroup(i, tmp_map); // store some meta-data about the maps in the "dummy" object -> try to // keep the same order as they were given in the input independent of // which map is the reference. dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = tmp_map.size(); dummy.getFileDescriptions()[i].unique_id = tmp_map.getUniqueId(); // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), tmp_map.getProteinIdentifications().begin(), tmp_map.getProteinIdentifications().end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), tmp_map.getUnassignedPeptideIdentifications().begin(), tmp_map.getUnassignedPeptideIdentifications().end()); } else { // copy the meta-data from the refernce map dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = ref_size; dummy.getFileDescriptions()[i].unique_id = ref_id; // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), ref_protids.begin(), ref_protids.end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), ref_pepids.begin(), ref_pepids.end()); } } // get the resulting map out_map = algorithm->getResultMap(); // // Copy back meta-data (Protein / Peptide ids / File descriptions) // // add protein identifications to result map out_map.getProteinIdentifications().insert( out_map.getProteinIdentifications().end(), dummy.getProteinIdentifications().begin(), dummy.getProteinIdentifications().end()); // add unassigned peptide identifications to result map out_map.getUnassignedPeptideIdentifications().insert( out_map.getUnassignedPeptideIdentifications().end(), dummy.getUnassignedPeptideIdentifications().begin(), dummy.getUnassignedPeptideIdentifications().end()); out_map.setFileDescriptions(dummy.getFileDescriptions()); // canonical ordering for checking the results, and the ids have no real meaning anyway // the way this was done in DelaunayPairFinder and StablePairFinder // -> the same ordering as FeatureGroupingAlgorithmUnlabeled::group applies! out_map.sortByMZ(); out_map.updateRanges(); } else { vector<ConsensusMap> maps(ins.size()); ConsensusXMLFile f; for (Size i = 0; i < ins.size(); ++i) { f.load(ins[i], maps[i]); StringList ms_runs; maps[i].getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); } // group algorithm->FeatureGroupingAlgorithm::group(maps, out_map); // set file descriptions: bool keep_subelements = getFlag_("keep_subelements"); if (!keep_subelements) { for (Size i = 0; i < ins.size(); ++i) { out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = maps[i].size(); out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId(); } } else { // components of the output map are not the input maps themselves, but // the components of the input maps: algorithm->transferSubelements(maps, out_map); } } // assign unique ids out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); // annotate output with data processing info addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING)); out_map.setPrimaryMSRunPath(ms_run_locations); // write output ConsensusXMLFile().store(out, out_map); // some statistics map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } LOG_INFO << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } LOG_INFO << " total: " << setw(6) << out_map.size() << endl; delete algorithm; return EXECUTION_OK; }
void FeatureGroupingAlgorithmUnlabeled::group(const std::vector<FeatureMap> & maps, ConsensusMap & out) { // check that the number of maps is ok if (maps.size() < 2) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "At least two maps must be given!"); } // define reference map (the one with most peaks) Size reference_map_index = 0; Size max_count = 0; for (Size m = 0; m < maps.size(); ++m) { if (maps[m].size() > max_count) { max_count = maps[m].size(); reference_map_index = m; } } std::vector<ConsensusMap> input(2); // build a consensus map of the elements of the reference map (contains only singleton consensus elements) MapConversion::convert(reference_map_index, maps[reference_map_index], input[0]); // loop over all other maps, extend the groups StablePairFinder pair_finder; pair_finder.setParameters(param_.copy("", true)); for (Size i = 0; i < maps.size(); ++i) { if (i != reference_map_index) { MapConversion::convert(i, maps[i], input[1]); // compute the consensus of the reference map and map i ConsensusMap result; pair_finder.run(input, result); input[0].swap(result); } } // replace result with temporary map out.swap(input[0]); // copy back the input maps (they have been deleted while swapping) out.getFileDescriptions() = input[0].getFileDescriptions(); // add protein IDs and unassigned peptide IDs to the result map here, // to keep the same order as the input maps (useful for output later) for (std::vector<FeatureMap>::const_iterator map_it = maps.begin(); map_it != maps.end(); ++map_it) { // add protein identifications to result map out.getProteinIdentifications().insert( out.getProteinIdentifications().end(), map_it->getProteinIdentifications().begin(), map_it->getProteinIdentifications().end()); // add unassigned peptide identifications to result map out.getUnassignedPeptideIdentifications().insert( out.getUnassignedPeptideIdentifications().end(), map_it->getUnassignedPeptideIdentifications().begin(), map_it->getUnassignedPeptideIdentifications().end()); } // canonical ordering for checking the results, and the ids have no real meaning anyway #if 1 // the way this was done in DelaunayPairFinder and StablePairFinder out.sortByMZ(); #else out.sortByQuality(); out.sortByMaps(); out.sortBySize(); #endif return; }
void ProteinInference::infer_(ConsensusMap & consensus_map, const size_t protein_idenfication_index, const UInt reference_map) { ProteinIdentification & protein_ident = consensus_map.getProteinIdentifications()[protein_idenfication_index]; for (size_t i = 0; i < protein_ident.getHits().size(); ++i) { // Protein Accession String accession = protein_ident.getHits()[i].getAccession(); // consensus feature -> peptide hit Map<size_t, PeptideHit> consensus_to_peptide; // search for it in consensus elements: for (size_t i_cm = 0; i_cm < consensus_map.size(); ++i_cm) { std::vector<PeptideHit> peptide_hits; for (std::vector<PeptideIdentification>::iterator it_pepid = consensus_map[i_cm].getPeptideIdentifications().begin(); it_pepid != consensus_map[i_cm].getPeptideIdentifications().end(); ++it_pepid) { // are Protein- and PeptideIdentification from the same search engine run? if (it_pepid->getIdentifier() != protein_ident.getIdentifier()) continue; std::vector<PeptideHit> peptide_hits_local; it_pepid->getReferencingHits(accession, peptide_hits_local); if (peptide_hits_local.empty()) continue; if (sortByUnique_(peptide_hits_local, it_pepid->isHigherScoreBetter())) // we found a unique peptide { peptide_hits.push_back(peptide_hits_local[0]); } } // if several PeptideIdentifications (==Spectra) were assigned to current ConsensusElement // --> take the best (as above), e.g. in SILAC this could happen // TODO: better idea? if (!peptide_hits.empty()) { if (sortByUnique_(peptide_hits, consensus_map[i_cm].getPeptideIdentifications()[0].isHigherScoreBetter())) //found a unique peptide for current ConsensusElement { consensus_to_peptide[i_cm] = peptide_hits[0]; #ifdef DEBUG_INFERENCE std::cout << "assign peptide " << peptide_hits[0].getSequence() << " to Protein " << accession << std::endl; #endif } } } // ! ConsensusMap loop // no peptides found that match current Protein if (consensus_to_peptide.empty()) continue; // Use all matching ConsensusElements to derive a quantitation for current protein // build up ratios for every map vs reference double coverage = 0; Map<Size, std::vector<IntensityType> > ratios; // number of unique peptides pointing to current protein UInt coverage_count = (UInt)consensus_to_peptide.size(); for (Map<size_t, PeptideHit>::iterator it_pephits = consensus_to_peptide.begin(); it_pephits != consensus_to_peptide.end(); ++it_pephits) { coverage += it_pephits->second.getSequence().size(); const ConsensusFeature::HandleSetType & handles = consensus_map[it_pephits->first].getFeatures(); //search if reference is present ConsensusFeature::HandleSetType::const_iterator it_ref = handles.end(); for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin(); it != handles.end(); ++it) { if (it->getMapIndex() == reference_map) { it_ref = it; break; } } // did not find a reference // TODO assume intensity==0 instead?? if (it_ref == handles.end()) continue; for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin(); it != handles.end(); ++it) { ratios[it->getMapIndex()].push_back(it->getIntensity() / it_ref->getIntensity()); } } // sort ratios map-wise and take median for (ConsensusMap::FileDescriptions::const_iterator it_file = consensus_map.getFileDescriptions().begin(); it_file != consensus_map.getFileDescriptions().end(); ++it_file) { if (ratios.has(it_file->first)) { //sort intensity ratios for map #it_file->first std::sort(ratios[it_file->first].begin(), ratios[it_file->first].end()); //take median IntensityType protein_ratio = ratios[it_file->first][ratios[it_file->first].size() / 2]; //TODO if ratios have high variance emit a warning! protein_ident.getHits()[i].setMetaValue(String("ratio_") + String(it_file->first), protein_ratio); } } // ! map loop // % coverage of protein by peptides coverage /= DoubleReal(protein_ident.getHits()[i].getSequence().size()) / 100; protein_ident.getHits()[i].setMetaValue("coverage", coverage); protein_ident.getHits()[i].setMetaValue("hits", coverage_count); } // ! Protein loop // protein_to_peptides now contains the Protein -> Peptides mapping // lets estimate the }
void LabeledPairFinder::run(const vector<ConsensusMap>& input_maps, ConsensusMap& result_map) { if (input_maps.size() != 1) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "exactly one input map required"); if (result_map.getFileDescriptions().size() != 2) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "two file descriptions required"); if (result_map.getFileDescriptions().begin()->second.filename != result_map.getFileDescriptions().rbegin()->second.filename) throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the two file descriptions have to contain the same file name"); checkIds_(input_maps); //look up the light and heavy index Size light_index = numeric_limits<Size>::max(); Size heavy_index = numeric_limits<Size>::max(); for (ConsensusMap::FileDescriptions::const_iterator it = result_map.getFileDescriptions().begin(); it != result_map.getFileDescriptions().end(); ++it) { if (it->second.label == "heavy") { heavy_index = it->first; } else if (it->second.label == "light") { light_index = it->first; } } if (light_index == numeric_limits<Size>::max() || heavy_index == numeric_limits<Size>::max()) { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "the input maps have to be labeled 'light' and 'heavy'"); } result_map.clear(false); // sort consensus features by RT (and MZ) to speed up searching afterwards typedef ConstRefVector<ConsensusMap> RefMap; RefMap model_ref(input_maps[0].begin(), input_maps[0].end()); model_ref.sortByPosition(); //calculate matches ConsensusMap matches; //settings double rt_pair_dist = param_.getValue("rt_pair_dist"); double rt_dev_low = param_.getValue("rt_dev_low"); double rt_dev_high = param_.getValue("rt_dev_high"); double mz_dev = param_.getValue("mz_dev"); DoubleList mz_pair_dists = param_.getValue("mz_pair_dists"); bool mrm = param_.getValue("mrm").toBool(); //estimate RT parameters if (param_.getValue("rt_estimate") == "true") { //find all possible RT distances of features with the same charge and a good m/z distance vector<double> dists; dists.reserve(model_ref.size()); for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it) { for (RefMap::const_iterator it2 = model_ref.begin(); it2 != model_ref.end(); ++it2) { for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it) { double mz_pair_dist = *dist_it; if (it2->getCharge() == it->getCharge() && it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev && it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev) { dists.push_back(it2->getRT() - it->getRT()); } } } } if (dists.empty()) { cout << "Warning: Could not find pairs for RT distance estimation. The manual settings are used!" << endl; } else { if (dists.size() < 50) { cout << "Warning: Found only " << dists.size() << " pairs. The estimated shift and std deviation are probably not reliable!" << endl; } //--------------------------- estimate initial parameters of fit --------------------------- GaussFitter::GaussFitResult result(-1, -1, -1); //first estimate of the optimal shift: median of the distances sort(dists.begin(), dists.end()); Size median_index = dists.size() / 2; result.x0 = dists[median_index]; //create histogram of distances //consider only the maximum of pairs, centered around the optimal shift Size max_pairs = model_ref.size() / 2; Size start_index = (Size) max((SignedSize)0, (SignedSize)(median_index - max_pairs / 2)); Size end_index = (Size) min((SignedSize)(dists.size() - 1), (SignedSize)(median_index + max_pairs / 2)); double start_value = dists[start_index]; double end_value = dists[end_index]; double bin_step = fabs(end_value - start_value) / 99.999; //ensure that we have 100 bins Math::Histogram<> hist(start_value, end_value, bin_step); //std::cout << "HIST from " << start_value << " to " << end_value << " (bin size " << bin_step << ")" << endl; for (Size i = start_index; i <= end_index; ++i) { hist.inc(dists[i]); } //cout << hist << endl; dists.clear(); //determine median of bins (uniform background distribution) vector<Size> bins(hist.begin(), hist.end()); sort(bins.begin(), bins.end()); Size bin_median = bins[bins.size() / 2]; bins.clear(); //estimate scale A: maximum of the histogram Size max_value = hist.maxValue(); result.A = max_value - bin_median; //overwrite estimate of x0 with the position of the highest bin for (Size i = 0; i < hist.size(); ++i) { if (hist[i] == max_value) { result.x0 = hist.centerOfBin(i); break; } } //estimate sigma: first time the count is less or equal the median count in the histogram double pos = result.x0; while (pos > start_value && hist.binValue(pos) > bin_median) { pos -= bin_step; } double sigma_low = result.x0 - pos; pos = result.x0; while (pos<end_value&& hist.binValue(pos)> bin_median) { pos += bin_step; } double sigma_high = pos - result.x0; result.sigma = (sigma_high + sigma_low) / 6.0; //cout << "estimated optimal RT distance (before fit): " << result.x0 << endl; //cout << "estimated allowed deviation (before fit): " << result.sigma*3.0 << endl; //--------------------------- do gauss fit --------------------------- vector<DPosition<2> > points(hist.size()); for (Size i = 0; i < hist.size(); ++i) { points[i][0] = hist.centerOfBin(i); points[i][1] = max(0u, hist[i]); } GaussFitter fitter; fitter.setInitialParameters(result); result = fitter.fit(points); cout << "estimated optimal RT distance: " << result.x0 << endl; cout << "estimated allowed deviation: " << fabs(result.sigma) * 3.0 << endl; rt_pair_dist = result.x0; rt_dev_low = fabs(result.sigma) * 3.0; rt_dev_high = fabs(result.sigma) * 3.0; } } // check each feature for (RefMap::const_iterator it = model_ref.begin(); it != model_ref.end(); ++it) { for (DoubleList::const_iterator dist_it = mz_pair_dists.begin(); dist_it != mz_pair_dists.end(); ++dist_it) { double mz_pair_dist = *dist_it; RefMap::const_iterator it2 = lower_bound(model_ref.begin(), model_ref.end(), it->getRT() + rt_pair_dist - rt_dev_low, ConsensusFeature::RTLess()); while (it2 != model_ref.end() && it2->getRT() <= it->getRT() + rt_pair_dist + rt_dev_high) { // if in mrm mode, we need to compare precursor mass difference and fragment mass difference, charge remains the same double prec_mz_diff(0); if (mrm) { prec_mz_diff = fabs((double)it2->getMetaValue("MZ") - (double)it->getMetaValue("MZ")); if (it->getCharge() != 0) { prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist / it->getCharge()); } else { prec_mz_diff = fabs(prec_mz_diff - mz_pair_dist); } } bool mrm_correct_dist(false); double frag_mz_diff = fabs(it->getMZ() - it2->getMZ()); //cerr << it->getRT() << " charge1=" << it->getCharge() << ", charge2=" << it2->getCharge() << ", prec_diff=" << prec_mz_diff << ", frag_diff=" << frag_mz_diff << endl; if (mrm && it2->getCharge() == it->getCharge() && prec_mz_diff < mz_dev && (frag_mz_diff < mz_dev || fabs(frag_mz_diff - mz_pair_dist) < mz_dev)) { mrm_correct_dist = true; //cerr << "mrm_correct_dist" << endl; } if ((mrm && mrm_correct_dist) || (!mrm && it2->getCharge() == it->getCharge() && it2->getMZ() >= it->getMZ() + mz_pair_dist / it->getCharge() - mz_dev && it2->getMZ() <= it->getMZ() + mz_pair_dist / it->getCharge() + mz_dev )) { //cerr << "dist correct" << endl; double score = sqrt( PValue_(it2->getMZ() - it->getMZ(), mz_pair_dist / it->getCharge(), mz_dev, mz_dev) * PValue_(it2->getRT() - it->getRT(), rt_pair_dist, rt_dev_low, rt_dev_high) ); // Note: we used to copy the id from the light feature here, but that strategy does not generalize to more than two labels. // We might want to report consensus features where the light one is missing but more than one heavier variant was found. // Also, the old strategy is inconsistent with what was done in the unlabeled case. Thus now we assign a new unique id here. matches.push_back(ConsensusFeature()); matches.back().setUniqueId(); matches.back().insert(light_index, *it); matches.back().clearMetaInfo(); matches.back().insert(heavy_index, *it2); matches.back().setQuality(score); matches.back().setCharge(it->getCharge()); matches.back().computeMonoisotopicConsensus(); } ++it2; } } } //compute best pairs // - sort matches by quality // - take highest-quality matches first (greedy) and mark them as used set<Size> used_features; matches.sortByQuality(true); for (ConsensusMap::const_iterator match = matches.begin(); match != matches.end(); ++match) { //check if features are not used yet if (used_features.find(match->begin()->getUniqueId()) == used_features.end() && used_features.find(match->rbegin()->getUniqueId()) == used_features.end() ) { //if unused, add it to the final set of elements result_map.push_back(*match); used_features.insert(match->begin()->getUniqueId()); used_features.insert(match->rbegin()->getUniqueId()); } } //Add protein identifications to result map for (Size i = 0; i < input_maps.size(); ++i) { result_map.getProteinIdentifications().insert(result_map.getProteinIdentifications().end(), input_maps[i].getProteinIdentifications().begin(), input_maps[i].getProteinIdentifications().end()); } //Add unassigned peptide identifications to result map for (Size i = 0; i < input_maps.size(); ++i) { result_map.getUnassignedPeptideIdentifications().insert(result_map.getUnassignedPeptideIdentifications().end(), input_maps[i].getUnassignedPeptideIdentifications().begin(), input_maps[i].getUnassignedPeptideIdentifications().end()); } // Very useful for checking the results, and the ids have no real meaning anyway result_map.sortByMZ(); }
void IBSpectraFile::store(const String& filename, const ConsensusMap& cm) { // typdefs for shorter code typedef std::vector<ProteinHit>::iterator ProtHitIt; // general settings .. do we need to expose these? // ---------------------------------------------------------------------- /// Allow also non-unique peptides to be exported bool allow_non_unique = true; /// Intensities below this value will be set to 0.0 to avoid numerical problems when quantifying double intensity_threshold = 0.00001; // ---------------------------------------------------------------------- // guess experiment type boost::shared_ptr<IsobaricQuantitationMethod> quantMethod = guessExperimentType_(cm); // we need the protein identifications to reference the protein names ProteinIdentification protIdent; bool has_proteinIdentifications = false; if (cm.getProteinIdentifications().size() > 0) { protIdent = cm.getProteinIdentifications()[0]; has_proteinIdentifications = true; } // start the file by adding the tsv header TextFile textFile; textFile.addLine(ListUtils::concatenate(constructHeader_(*quantMethod), "\t")); for (ConsensusMap::ConstIterator cm_iter = cm.begin(); cm_iter != cm.end(); ++cm_iter) { const ConsensusFeature& cFeature = *cm_iter; std::vector<IdCSV> entries; /// 1st we extract the identification information from the consensus feature if (cFeature.getPeptideIdentifications().size() == 0 || !has_proteinIdentifications) { // we store unidentified hits anyway, because the iTRAQ quant is still helpful for normalization entries.push_back(IdCSV()); } else { // protein name: const PeptideHit& peptide_hit = cFeature.getPeptideIdentifications()[0].getHits()[0]; std::set<String> protein_accessions = peptide_hit.extractProteinAccessions(); if (protein_accessions.size() != 1) { if (!allow_non_unique) continue; // we only want unique peptides } for (std::set<String>::const_iterator prot_ac = protein_accessions.begin(); prot_ac != protein_accessions.end(); ++prot_ac) { IdCSV entry; entry.charge = cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge(); entry.peptide = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString(); entry.theo_mass = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge()); // write modif entry.modif = getModifString_(cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence()); ProtHitIt proteinHit = protIdent.findHit(*prot_ac); if (proteinHit == protIdent.getHits().end()) { std::cerr << "Protein referenced in peptide not found...\n"; continue; // protein not found } entry.accession = proteinHit->getAccession(); entries.push_back(entry); } } // 2nd we add the quantitative information of the channels // .. skip features with 0 intensity if (cFeature.getIntensity() == 0) { continue; } for (std::vector<IdCSV>::iterator entry = entries.begin(); entry != entries.end(); ++entry) { // set parent intensity entry->parent_intens = cFeature.getIntensity(); entry->retention_time = cFeature.getRT(); entry->spectrum = cFeature.getUniqueId(); entry->exp_mass = cFeature.getMZ(); // create output line StringList currentLine; // add entry to currentLine entry->toStringList(currentLine); // extract channel intensities and positions std::map<Int, double> intensityMap; ConsensusFeature::HandleSetType features = cFeature.getFeatures(); for (ConsensusFeature::HandleSetType::const_iterator fIt = features.begin(); fIt != features.end(); ++fIt) { intensityMap[Int(fIt->getMZ())] = (fIt->getIntensity() > intensity_threshold ? fIt->getIntensity() : 0.0); } for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin(); it != quantMethod->getChannelInformation().end(); ++it) { currentLine.push_back(String(it->center)); } for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin(); it != quantMethod->getChannelInformation().end(); ++it) { currentLine.push_back(String(intensityMap[int(it->center)])); } textFile.addLine(ListUtils::concatenate(currentLine, "\t")); } } // write to file textFile.store(filename); }
ExitCodes main_(int, const char **) { String in = getStringOption_("in"), out = getStringOption_("out"), id_out = getStringOption_("id_out"); if (out.empty() && id_out.empty()) { throw Exception::RequiredParameterNotGiven(__FILE__, __LINE__, __PRETTY_FUNCTION__, "out/id_out"); } vector<ProteinIdentification> proteins; vector<PeptideIdentification> peptides; FileTypes::Type in_type = FileHandler::getType(in); if (in_type == FileTypes::MZML) { MSExperiment<> experiment; MzMLFile().load(in, experiment); // what about unassigned peptide IDs? for (MSExperiment<>::Iterator exp_it = experiment.begin(); exp_it != experiment.end(); ++exp_it) { peptides.insert(peptides.end(), exp_it->getPeptideIdentifications().begin(), exp_it->getPeptideIdentifications().end()); exp_it->getPeptideIdentifications().clear(); } experiment.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(experiment, getProcessingInfo_(DataProcessing::FILTERING)); MzMLFile().store(out, experiment); } } else if (in_type == FileTypes::FEATUREXML) { FeatureMap features; FeatureXMLFile().load(in, features); features.getUnassignedPeptideIdentifications().swap(peptides); for (FeatureMap::Iterator feat_it = features.begin(); feat_it != features.end(); ++feat_it) { peptides.insert(peptides.end(), feat_it->getPeptideIdentifications().begin(), feat_it->getPeptideIdentifications().end()); feat_it->getPeptideIdentifications().clear(); } features.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(features, getProcessingInfo_(DataProcessing::FILTERING)); FeatureXMLFile().store(out, features); } } else // consensusXML { ConsensusMap consensus; ConsensusXMLFile().load(in, consensus); consensus.getUnassignedPeptideIdentifications().swap(peptides); for (ConsensusMap::Iterator cons_it = consensus.begin(); cons_it != consensus.end(); ++cons_it) { peptides.insert(peptides.end(), cons_it->getPeptideIdentifications().begin(), cons_it->getPeptideIdentifications().end()); cons_it->getPeptideIdentifications().clear(); } consensus.getProteinIdentifications().swap(proteins); if (!out.empty()) { addDataProcessing_(consensus, getProcessingInfo_(DataProcessing::FILTERING)); ConsensusXMLFile().store(out, consensus); } } if (!id_out.empty()) { // IDMapper can match a peptide ID to several overlapping features, // resulting in duplicates; this shouldn't be the case for peak data if (in_type != FileTypes::MZML) removeDuplicates_(peptides); IdXMLFile().store(id_out, proteins, peptides); } return EXECUTION_OK; }
TEST_EQUAL(map.getFileDescriptions()[1].getMetaValue("name5") == DataValue("value5"), true) TEST_EQUAL(map.getFileDescriptions()[1].getMetaValue("name6") == DataValue(6.0), true) //data processing TEST_EQUAL(map.getDataProcessing().size(), 2) TEST_STRING_EQUAL(map.getDataProcessing()[0].getSoftware().getName(), "Software1") TEST_STRING_EQUAL(map.getDataProcessing()[0].getSoftware().getVersion(), "0.91a") TEST_EQUAL(map.getDataProcessing()[0].getProcessingActions().size(), 1) TEST_EQUAL(map.getDataProcessing()[0].getProcessingActions().count(DataProcessing::DEISOTOPING), 1) TEST_STRING_EQUAL(map.getDataProcessing()[0].getMetaValue("name"), "dataProcessing") TEST_STRING_EQUAL(map.getDataProcessing()[1].getSoftware().getName(), "Software2") TEST_STRING_EQUAL(map.getDataProcessing()[1].getSoftware().getVersion(), "0.92a") TEST_EQUAL(map.getDataProcessing()[1].getProcessingActions().size(), 2) TEST_EQUAL(map.getDataProcessing()[1].getProcessingActions().count(DataProcessing::SMOOTHING), 1) TEST_EQUAL(map.getDataProcessing()[1].getProcessingActions().count(DataProcessing::BASELINE_REDUCTION), 1) //protein identifications TEST_EQUAL(map.getProteinIdentifications().size(), 2) TEST_EQUAL(map.getProteinIdentifications()[0].getHits().size(), 2) TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[0].getSequence(), "ABCDEFG") TEST_EQUAL(map.getProteinIdentifications()[0].getHits()[1].getSequence(), "HIJKLMN") TEST_EQUAL(map.getProteinIdentifications()[1].getHits().size(), 1) TEST_EQUAL(map.getProteinIdentifications()[1].getHits()[0].getSequence(), "OPQREST") //peptide identifications TEST_EQUAL(map[0].getPeptideIdentifications().size(), 2) TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map[0].getPeptideIdentifications()[0].getHits()[0].getSequence(), "A") TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits().size(), 2) TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[0].getSequence(), "C") TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[1].getSequence(), "D") TEST_EQUAL(map[1].getPeptideIdentifications().size(), 1) TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits().size(), 1) TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits()[0].getSequence(), "E")