bool ConsensusMap::isMapConsistent(Logger::LogStream* stream) const { Size stats_wrongMID(0); // invalid map ID references by a feature handle Map<Size,Size> wrong_ID_count; // which IDs were given which are not valid // check file descriptions std::set<String> maps; String all_maps; // for output later for (FileDescriptions::ConstIterator it=file_description_.begin(); it!=file_description_.end(); ++it) { String s = String(" file: ") + it->second.filename + " label: " + it->second.label; maps.insert(s); all_maps += s; } if (maps.size() != file_description_.size()) { if (stream != 0) { *stream << "ConsensusMap file descriptions are not unique:\n" << all_maps << std::endl; } return false; } // check map IDs for (Size i = 0; i < size(); ++i) { const ConsensusFeature& elem = (*this)[i]; for (ConsensusFeature::HandleSetType::const_iterator it = elem.begin(); it != elem.end(); ++it) { if (!file_description_.has(it->getMapIndex())) { ++stats_wrongMID; ++wrong_ID_count[it->getMapIndex()]; } } } if (stats_wrongMID > 0) { if (stream != 0) { *stream << "ConsensusMap contains " << stats_wrongMID << " invalid references to maps:\n"; for (Map<Size,Size>::ConstIterator it=wrong_ID_count.begin(); it!=wrong_ID_count.end(); ++it) { *stream << " wrong id="<< it->first << " (occurred " << it->second << "x)\n"; } *stream << std::endl; } return false; } return true; }
void MapAlignmentTransformer::applyToConsensusFeature_( ConsensusFeature& feature, const TransformationDescription& trafo, bool store_original_rt) { applyToBaseFeature_(feature, trafo, store_original_rt); // apply to grouped features (feature handles): for (ConsensusFeature::HandleSetType::const_iterator it = feature.getFeatures().begin(); it != feature.getFeatures().end(); ++it) { double rt = it->getRT(); it->asMutable().setRT(trafo.apply(rt)); } }
void ConsensusMap::updateRanges() { clearRanges(); updateRanges_(begin(), end()); // enlarge the range by the internal points of each feature for (Size i = 0; i < size(); ++i) { for (ConsensusFeature::HandleSetType::const_iterator it = operator[](i).begin(); it != operator[](i).end(); ++it) { DoubleReal rt = it->getRT(); DoubleReal mz = it->getMZ(); DoubleReal intensity = it->getIntensity(); // update RT if (rt < pos_range_.minPosition()[Peak2D::RT]) { pos_range_.setMinX(rt); } if (rt > pos_range_.maxPosition()[Peak2D::RT]) { pos_range_.setMaxX(rt); } // update m/z if (mz < pos_range_.minPosition()[Peak2D::MZ]) { pos_range_.setMinY(mz); } if (mz > pos_range_.maxPosition()[Peak2D::MZ]) { pos_range_.setMaxY(mz); } // update intensity if (intensity < int_range_.minX()) { int_range_.setMinX(intensity); } if (intensity > int_range_.maxX()) { int_range_.setMaxX(intensity); } } } }
void EDTAFile::store(const String& filename, const ConsensusMap& map) const { TextFile tf; // search for maximum number of sub-features (since this determines the number of columns) Size max_sub(0); for (Size i = 0; i < map.size(); ++i) { max_sub = std::max(max_sub, map[i].getFeatures().size()); } // write header String header("RT\tm/z\tintensity\tcharge"); for (Size i = 1; i <= max_sub; ++i) { header += "\tRT" + String(i) + "\tm/z" + String(i) + "\tintensity" + String(i) + "\tcharge" + String(i); } tf.addLine(header); for (Size i = 0; i < map.size(); ++i) { ConsensusFeature f = map[i]; // consensus String entry = String(f.getRT()) + "\t" + f.getMZ() + "\t" + f.getIntensity() + "\t" + f.getCharge(); // sub-features ConsensusFeature::HandleSetType handle = f.getFeatures(); for (ConsensusFeature::HandleSetType::const_iterator it = handle.begin(); it != handle.end(); ++it) { entry += String("\t") + it->getRT() + "\t" + it->getMZ() + "\t" + it->getIntensity() + "\t" + it->getCharge(); } // missing sub-features for (Size j = handle.size(); j < max_sub; ++j) { entry += "\tNA\tNA\tNA\tNA"; } tf.addLine(entry); } tf.store(filename); }
TEST_EQUAL(feature_maps[0][3].getIntensity(), 120) TEST_EQUAL(feature_maps[0][3].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "CNHAADDAAAAA") TEST_EQUAL(feature_maps[0][4].getIntensity(), 250) TEST_EQUAL(feature_maps[0][4].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "HHHHHHHHHHH") TEST_EQUAL(feature_maps[0][5].getIntensity(), 100) TEST_EQUAL(feature_maps[0][5].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "LDCELR") // Test ConsensusMap association ConsensusMap cm = labeler.getConsensus(); TEST_EQUAL(cm.size(), 1) ABORT_IF(cm.size() != 1) TEST_EQUAL(cm[0].getFeatures().size(),2) ConsensusFeature::HandleSetType::const_iterator fhIt = cm[0].getFeatures().begin(); TEST_EQUAL(feature_maps[0][1].getUniqueId(), fhIt->getUniqueId()) ++fhIt; TEST_EQUAL(feature_maps[0][0].getUniqueId(), fhIt->getUniqueId()) // now test the incomplete variant createTestFeatureMapSimVector_(feature_maps); digestFeaturesMapSimVector_(feature_maps); O18Labeler incomplete_labeler; Param p; p.setValue("labeling_efficiency", 0.7); incomplete_labeler.setParameters(p); incomplete_labeler.postDigestHook(feature_maps);
void ProteinInference::infer_(ConsensusMap & consensus_map, const size_t protein_idenfication_index, const UInt reference_map) { ProteinIdentification & protein_ident = consensus_map.getProteinIdentifications()[protein_idenfication_index]; for (size_t i = 0; i < protein_ident.getHits().size(); ++i) { // Protein Accession String accession = protein_ident.getHits()[i].getAccession(); // consensus feature -> peptide hit Map<size_t, PeptideHit> consensus_to_peptide; // search for it in consensus elements: for (size_t i_cm = 0; i_cm < consensus_map.size(); ++i_cm) { std::vector<PeptideHit> peptide_hits; for (std::vector<PeptideIdentification>::iterator it_pepid = consensus_map[i_cm].getPeptideIdentifications().begin(); it_pepid != consensus_map[i_cm].getPeptideIdentifications().end(); ++it_pepid) { // are Protein- and PeptideIdentification from the same search engine run? if (it_pepid->getIdentifier() != protein_ident.getIdentifier()) continue; std::vector<PeptideHit> peptide_hits_local; it_pepid->getReferencingHits(accession, peptide_hits_local); if (peptide_hits_local.empty()) continue; if (sortByUnique_(peptide_hits_local, it_pepid->isHigherScoreBetter())) // we found a unique peptide { peptide_hits.push_back(peptide_hits_local[0]); } } // if several PeptideIdentifications (==Spectra) were assigned to current ConsensusElement // --> take the best (as above), e.g. in SILAC this could happen // TODO: better idea? if (!peptide_hits.empty()) { if (sortByUnique_(peptide_hits, consensus_map[i_cm].getPeptideIdentifications()[0].isHigherScoreBetter())) //found a unique peptide for current ConsensusElement { consensus_to_peptide[i_cm] = peptide_hits[0]; #ifdef DEBUG_INFERENCE std::cout << "assign peptide " << peptide_hits[0].getSequence() << " to Protein " << accession << std::endl; #endif } } } // ! ConsensusMap loop // no peptides found that match current Protein if (consensus_to_peptide.empty()) continue; // Use all matching ConsensusElements to derive a quantitation for current protein // build up ratios for every map vs reference double coverage = 0; Map<Size, std::vector<IntensityType> > ratios; // number of unique peptides pointing to current protein UInt coverage_count = (UInt)consensus_to_peptide.size(); for (Map<size_t, PeptideHit>::iterator it_pephits = consensus_to_peptide.begin(); it_pephits != consensus_to_peptide.end(); ++it_pephits) { coverage += it_pephits->second.getSequence().size(); const ConsensusFeature::HandleSetType & handles = consensus_map[it_pephits->first].getFeatures(); //search if reference is present ConsensusFeature::HandleSetType::const_iterator it_ref = handles.end(); for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin(); it != handles.end(); ++it) { if (it->getMapIndex() == reference_map) { it_ref = it; break; } } // did not find a reference // TODO assume intensity==0 instead?? if (it_ref == handles.end()) continue; for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin(); it != handles.end(); ++it) { ratios[it->getMapIndex()].push_back(it->getIntensity() / it_ref->getIntensity()); } } // sort ratios map-wise and take median for (ConsensusMap::FileDescriptions::const_iterator it_file = consensus_map.getFileDescriptions().begin(); it_file != consensus_map.getFileDescriptions().end(); ++it_file) { if (ratios.has(it_file->first)) { //sort intensity ratios for map #it_file->first std::sort(ratios[it_file->first].begin(), ratios[it_file->first].end()); //take median IntensityType protein_ratio = ratios[it_file->first][ratios[it_file->first].size() / 2]; //TODO if ratios have high variance emit a warning! protein_ident.getHits()[i].setMetaValue(String("ratio_") + String(it_file->first), protein_ratio); } } // ! map loop // % coverage of protein by peptides coverage /= DoubleReal(protein_ident.getHits()[i].getSequence().size()) / 100; protein_ident.getHits()[i].setMetaValue("coverage", coverage); protein_ident.getHits()[i].setMetaValue("hits", coverage_count); } // ! Protein loop // protein_to_peptides now contains the Protein -> Peptides mapping // lets estimate the }
fga.setParameters(p); //test exception (no file name set in out) TEST_EXCEPTION(Exception::IllegalArgument, fga.group(in,out)); out.getColumnHeaders()[5].label = "light"; out.getColumnHeaders()[5].filename = "filename"; out.getColumnHeaders()[8] = out.getColumnHeaders()[5]; out.getColumnHeaders()[8].label = "heavy"; fga.group(in,out); TEST_EQUAL(out.size(),1) TEST_REAL_SIMILAR(out[0].getQuality(),0.959346); TEST_EQUAL(out[0].size(),2) ConsensusFeature::HandleSetType::const_iterator it = out[0].begin(); TEST_REAL_SIMILAR(it->getMZ(),1.0f); TEST_REAL_SIMILAR(it->getRT(),1.0f); ++it; TEST_REAL_SIMILAR(it->getMZ(),5.0f); TEST_REAL_SIMILAR(it->getRT(),1.5f); END_SECTION ///////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////// END_TEST
void IBSpectraFile::store(const String& filename, const ConsensusMap& cm) { // typdefs for shorter code typedef std::vector<ProteinHit>::iterator ProtHitIt; // general settings .. do we need to expose these? // ---------------------------------------------------------------------- /// Allow also non-unique peptides to be exported bool allow_non_unique = true; /// Intensities below this value will be set to 0.0 to avoid numerical problems when quantifying double intensity_threshold = 0.00001; // ---------------------------------------------------------------------- // guess experiment type boost::shared_ptr<IsobaricQuantitationMethod> quantMethod = guessExperimentType_(cm); // we need the protein identifications to reference the protein names ProteinIdentification protIdent; bool has_proteinIdentifications = false; if (cm.getProteinIdentifications().size() > 0) { protIdent = cm.getProteinIdentifications()[0]; has_proteinIdentifications = true; } // start the file by adding the tsv header TextFile textFile; textFile.addLine(ListUtils::concatenate(constructHeader_(*quantMethod), "\t")); for (ConsensusMap::ConstIterator cm_iter = cm.begin(); cm_iter != cm.end(); ++cm_iter) { const ConsensusFeature& cFeature = *cm_iter; std::vector<IdCSV> entries; /// 1st we extract the identification information from the consensus feature if (cFeature.getPeptideIdentifications().size() == 0 || !has_proteinIdentifications) { // we store unidentified hits anyway, because the iTRAQ quant is still helpful for normalization entries.push_back(IdCSV()); } else { // protein name: const PeptideHit& peptide_hit = cFeature.getPeptideIdentifications()[0].getHits()[0]; std::set<String> protein_accessions = peptide_hit.extractProteinAccessions(); if (protein_accessions.size() != 1) { if (!allow_non_unique) continue; // we only want unique peptides } for (std::set<String>::const_iterator prot_ac = protein_accessions.begin(); prot_ac != protein_accessions.end(); ++prot_ac) { IdCSV entry; entry.charge = cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge(); entry.peptide = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString(); entry.theo_mass = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge()); // write modif entry.modif = getModifString_(cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence()); ProtHitIt proteinHit = protIdent.findHit(*prot_ac); if (proteinHit == protIdent.getHits().end()) { std::cerr << "Protein referenced in peptide not found...\n"; continue; // protein not found } entry.accession = proteinHit->getAccession(); entries.push_back(entry); } } // 2nd we add the quantitative information of the channels // .. skip features with 0 intensity if (cFeature.getIntensity() == 0) { continue; } for (std::vector<IdCSV>::iterator entry = entries.begin(); entry != entries.end(); ++entry) { // set parent intensity entry->parent_intens = cFeature.getIntensity(); entry->retention_time = cFeature.getRT(); entry->spectrum = cFeature.getUniqueId(); entry->exp_mass = cFeature.getMZ(); // create output line StringList currentLine; // add entry to currentLine entry->toStringList(currentLine); // extract channel intensities and positions std::map<Int, double> intensityMap; ConsensusFeature::HandleSetType features = cFeature.getFeatures(); for (ConsensusFeature::HandleSetType::const_iterator fIt = features.begin(); fIt != features.end(); ++fIt) { intensityMap[Int(fIt->getMZ())] = (fIt->getIntensity() > intensity_threshold ? fIt->getIntensity() : 0.0); } for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin(); it != quantMethod->getChannelInformation().end(); ++it) { currentLine.push_back(String(it->center)); } for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin(); it != quantMethod->getChannelInformation().end(); ++it) { currentLine.push_back(String(intensityMap[int(it->center)])); } textFile.addLine(ListUtils::concatenate(currentLine, "\t")); } } // write to file textFile.store(filename); }
//features TEST_EQUAL(map.size(), 6) ConsensusFeature cons_feature = map[0]; TEST_REAL_SIMILAR(cons_feature.getRT(), 1273.27) TEST_REAL_SIMILAR(cons_feature.getMZ(), 904.47) TEST_REAL_SIMILAR(cons_feature.getIntensity(), 3.12539e+07) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1273.27) TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1273.27) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 904.47) TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[1], 904.47) TEST_REAL_SIMILAR(cons_feature.getIntensityRange().minPosition()[0], 3.12539e+07) TEST_REAL_SIMILAR(cons_feature.getIntensityRange().maxPosition()[0], 3.12539e+07) TEST_REAL_SIMILAR(cons_feature.getQuality(), 1.1) TEST_EQUAL(cons_feature.getMetaValue("peptide_id") == DataValue("RefSeq:NC_1234"), true) ConsensusFeature::HandleSetType::const_iterator it = cons_feature.begin(); TEST_REAL_SIMILAR(it->getIntensity(), 3.12539e+07) cons_feature = map[5]; TEST_REAL_SIMILAR(cons_feature.getRT(), 1194.82) TEST_REAL_SIMILAR(cons_feature.getMZ(), 777.101) TEST_REAL_SIMILAR(cons_feature.getIntensity(), 1.78215e+07) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1194.82) TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1194.82) TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 777.101) TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[1], 777.101) TEST_REAL_SIMILAR(cons_feature.getIntensityRange().minPosition()[0], 1.78215e+07) TEST_REAL_SIMILAR(cons_feature.getIntensityRange().maxPosition()[0], 1.78215e+07) TEST_REAL_SIMILAR(cons_feature.getQuality(), 0.0) it = cons_feature.begin(); TEST_REAL_SIMILAR(it->getIntensity(), 1.78215e+07)