Ejemplo n.º 1
0
  bool ConsensusMap::isMapConsistent(Logger::LogStream* stream) const
  {
    Size stats_wrongMID(0); // invalid map ID references by a feature handle
    Map<Size,Size> wrong_ID_count; // which IDs were given which are not valid

    // check file descriptions
    std::set<String> maps;
    String all_maps; // for output later
    for (FileDescriptions::ConstIterator it=file_description_.begin();  it!=file_description_.end(); ++it)
    {
      String s = String("  file: ") + it->second.filename + " label: " + it->second.label;
      maps.insert(s);
      all_maps += s;
    }

    if (maps.size() != file_description_.size())
    {
      if (stream != 0)
      {
        *stream << "ConsensusMap file descriptions are not unique:\n" << all_maps << std::endl;
      }
      return false;
    }


    // check map IDs
    for (Size i = 0; i < size(); ++i)
    {
      const ConsensusFeature& elem = (*this)[i];
      for (ConsensusFeature::HandleSetType::const_iterator it = elem.begin(); it != elem.end(); ++it)
      {
        if (!file_description_.has(it->getMapIndex()))
        {
          ++stats_wrongMID;
          ++wrong_ID_count[it->getMapIndex()];
        }
      }
    }

    if (stats_wrongMID > 0)
    {
      if (stream != 0)
      {
        *stream << "ConsensusMap contains " << stats_wrongMID << " invalid references to maps:\n";
        for (Map<Size,Size>::ConstIterator it=wrong_ID_count.begin(); it!=wrong_ID_count.end(); ++it)
        {
           *stream << "  wrong id="<< it->first << " (occurred " << it->second << "x)\n";
        }
        *stream << std::endl;
      }
      return false;
    }

    return true;
  }
Ejemplo n.º 2
0
  void MapAlignmentTransformer::applyToConsensusFeature_(
    ConsensusFeature& feature, const TransformationDescription& trafo,
    bool store_original_rt)
  {
    applyToBaseFeature_(feature, trafo, store_original_rt);

    // apply to grouped features (feature handles):
    for (ConsensusFeature::HandleSetType::const_iterator it = 
           feature.getFeatures().begin(); it != feature.getFeatures().end();
         ++it)
    {
      double rt = it->getRT();
      it->asMutable().setRT(trafo.apply(rt));
    }
  }
Ejemplo n.º 3
0
  void ConsensusMap::updateRanges()
  {
    clearRanges();
    updateRanges_(begin(), end());

    // enlarge the range by the internal points of each feature
    for (Size i = 0; i < size(); ++i)
    {
      for (ConsensusFeature::HandleSetType::const_iterator it = operator[](i).begin(); it != operator[](i).end(); ++it)
      {
        DoubleReal rt = it->getRT();
        DoubleReal mz = it->getMZ();
        DoubleReal intensity = it->getIntensity();

        // update RT
        if (rt < pos_range_.minPosition()[Peak2D::RT])
        {
          pos_range_.setMinX(rt);
        }
        if (rt > pos_range_.maxPosition()[Peak2D::RT])
        {
          pos_range_.setMaxX(rt);
        }
        // update m/z
        if (mz < pos_range_.minPosition()[Peak2D::MZ])
        {
          pos_range_.setMinY(mz);
        }
        if (mz > pos_range_.maxPosition()[Peak2D::MZ])
        {
          pos_range_.setMaxY(mz);
        }
        // update intensity
        if (intensity <  int_range_.minX())
        {
          int_range_.setMinX(intensity);
        }
        if (intensity > int_range_.maxX())
        {
          int_range_.setMaxX(intensity);
        }
      }
    }
  }
Ejemplo n.º 4
0
  void EDTAFile::store(const String& filename, const ConsensusMap& map) const
  {
    TextFile tf;

    // search for maximum number of sub-features (since this determines the number of columns)
    Size max_sub(0);
    for (Size i = 0; i < map.size(); ++i)
    {
      max_sub = std::max(max_sub, map[i].getFeatures().size());
    }

    // write header
    String header("RT\tm/z\tintensity\tcharge");
    for (Size i = 1; i <= max_sub; ++i)
    {
      header += "\tRT" + String(i) + "\tm/z" + String(i) + "\tintensity" + String(i) + "\tcharge" + String(i);
    }
    tf.addLine(header);

    for (Size i = 0; i < map.size(); ++i)
    {
      ConsensusFeature f = map[i];
      // consensus
      String entry = String(f.getRT()) + "\t" + f.getMZ() + "\t" + f.getIntensity() + "\t" + f.getCharge();
      // sub-features
      ConsensusFeature::HandleSetType handle = f.getFeatures();
      for (ConsensusFeature::HandleSetType::const_iterator it = handle.begin(); it != handle.end(); ++it)
      {
        entry += String("\t") + it->getRT() + "\t" + it->getMZ() + "\t" + it->getIntensity() + "\t" + it->getCharge();
      }
      // missing sub-features
      for (Size j = handle.size(); j < max_sub; ++j)
      {
        entry += "\tNA\tNA\tNA\tNA";
      }
      tf.addLine(entry);
    }


    tf.store(filename);
  }
Ejemplo n.º 5
0
  TEST_EQUAL(feature_maps[0][3].getIntensity(), 120)
  TEST_EQUAL(feature_maps[0][3].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "CNHAADDAAAAA")

  TEST_EQUAL(feature_maps[0][4].getIntensity(), 250)
  TEST_EQUAL(feature_maps[0][4].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "HHHHHHHHHHH")

  TEST_EQUAL(feature_maps[0][5].getIntensity(), 100)
  TEST_EQUAL(feature_maps[0][5].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "LDCELR")

  // Test ConsensusMap association
  ConsensusMap cm = labeler.getConsensus();
  TEST_EQUAL(cm.size(), 1)
  ABORT_IF(cm.size() != 1)
  TEST_EQUAL(cm[0].getFeatures().size(),2)

  ConsensusFeature::HandleSetType::const_iterator fhIt = cm[0].getFeatures().begin();
  TEST_EQUAL(feature_maps[0][1].getUniqueId(), fhIt->getUniqueId())
  ++fhIt;
  TEST_EQUAL(feature_maps[0][0].getUniqueId(), fhIt->getUniqueId())

  // now test the incomplete variant
  createTestFeatureMapSimVector_(feature_maps);
  digestFeaturesMapSimVector_(feature_maps);

  O18Labeler incomplete_labeler;
  Param p;
  p.setValue("labeling_efficiency", 0.7);
  incomplete_labeler.setParameters(p);

  incomplete_labeler.postDigestHook(feature_maps);
Ejemplo n.º 6
0
  void ProteinInference::infer_(ConsensusMap & consensus_map,
                                const size_t protein_idenfication_index,
                                const UInt reference_map)
  {

    ProteinIdentification & protein_ident = consensus_map.getProteinIdentifications()[protein_idenfication_index];
    for (size_t i = 0; i < protein_ident.getHits().size(); ++i)
    {
      // Protein Accession
      String accession = protein_ident.getHits()[i].getAccession();

      // consensus feature -> peptide hit
      Map<size_t, PeptideHit> consensus_to_peptide;

      // search for it in consensus elements:
      for (size_t i_cm = 0; i_cm < consensus_map.size(); ++i_cm)
      {
        std::vector<PeptideHit> peptide_hits;
        for (std::vector<PeptideIdentification>::iterator it_pepid = consensus_map[i_cm].getPeptideIdentifications().begin();
             it_pepid != consensus_map[i_cm].getPeptideIdentifications().end();
             ++it_pepid)
        {
          // are Protein- and PeptideIdentification from the same search engine run?
          if (it_pepid->getIdentifier() != protein_ident.getIdentifier())
            continue;

          std::vector<PeptideHit> peptide_hits_local;

          it_pepid->getReferencingHits(accession, peptide_hits_local);

          if (peptide_hits_local.empty())
            continue;

          if (sortByUnique_(peptide_hits_local, it_pepid->isHigherScoreBetter())) // we found a unique peptide
          {
            peptide_hits.push_back(peptide_hits_local[0]);
          }

        }

        // if several PeptideIdentifications (==Spectra) were assigned to current ConsensusElement
        // --> take the best (as above), e.g. in SILAC this could happen
        // TODO: better idea?
        if (!peptide_hits.empty())
        {
          if (sortByUnique_(peptide_hits, consensus_map[i_cm].getPeptideIdentifications()[0].isHigherScoreBetter())) //found a unique peptide for current ConsensusElement
          {
            consensus_to_peptide[i_cm] = peptide_hits[0];
#ifdef DEBUG_INFERENCE
            std::cout << "assign peptide " <<  peptide_hits[0].getSequence() << " to Protein " << accession << std::endl;
#endif
          }
        }

      }       // ! ConsensusMap loop

      // no peptides found that match current Protein
      if (consensus_to_peptide.empty())
        continue;

      // Use all matching ConsensusElements to derive a quantitation for current protein
      // build up ratios for every map vs reference
      double coverage = 0;
      Map<Size, std::vector<IntensityType> > ratios;

      // number of unique peptides pointing to current protein
      UInt coverage_count = (UInt)consensus_to_peptide.size();

      for (Map<size_t, PeptideHit>::iterator it_pephits = consensus_to_peptide.begin();
           it_pephits != consensus_to_peptide.end();
           ++it_pephits)
      {
        coverage += it_pephits->second.getSequence().size();
        const ConsensusFeature::HandleSetType & handles = consensus_map[it_pephits->first].getFeatures();
        //search if reference is present
        ConsensusFeature::HandleSetType::const_iterator it_ref = handles.end();
        for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin();
             it != handles.end();
             ++it)
        {
          if (it->getMapIndex() == reference_map)
          {
            it_ref = it;
            break;
          }
        }

        // did not find a reference
        // TODO assume intensity==0 instead??
        if (it_ref == handles.end())
          continue;

        for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin();
             it != handles.end();
             ++it)
        {
          ratios[it->getMapIndex()].push_back(it->getIntensity() / it_ref->getIntensity());
        }

      }

      // sort ratios map-wise and take median
      for (ConsensusMap::FileDescriptions::const_iterator it_file = consensus_map.getFileDescriptions().begin();
           it_file != consensus_map.getFileDescriptions().end();
           ++it_file)
      {
        if (ratios.has(it_file->first))
        {
          //sort intensity ratios for map #it_file->first
          std::sort(ratios[it_file->first].begin(), ratios[it_file->first].end());
          //take median
          IntensityType protein_ratio = ratios[it_file->first][ratios[it_file->first].size() / 2];

          //TODO if ratios have high variance emit a warning!

          protein_ident.getHits()[i].setMetaValue(String("ratio_") + String(it_file->first), protein_ratio);
        }

      }       // ! map loop

      // % coverage of protein by peptides
      coverage /= DoubleReal(protein_ident.getHits()[i].getSequence().size()) / 100;

      protein_ident.getHits()[i].setMetaValue("coverage", coverage);
      protein_ident.getHits()[i].setMetaValue("hits", coverage_count);

    }     // ! Protein loop



    // protein_to_peptides now contains the Protein -> Peptides mapping
    // lets estimate the

  }
	fga.setParameters(p);


	//test exception (no file name set in out)
	TEST_EXCEPTION(Exception::IllegalArgument, fga.group(in,out));

	out.getColumnHeaders()[5].label = "light";
	out.getColumnHeaders()[5].filename = "filename";
	out.getColumnHeaders()[8] = out.getColumnHeaders()[5];
	out.getColumnHeaders()[8].label = "heavy";
	fga.group(in,out);

	TEST_EQUAL(out.size(),1)
	TEST_REAL_SIMILAR(out[0].getQuality(),0.959346);
	TEST_EQUAL(out[0].size(),2)
	ConsensusFeature::HandleSetType::const_iterator it = out[0].begin();
	TEST_REAL_SIMILAR(it->getMZ(),1.0f);
	TEST_REAL_SIMILAR(it->getRT(),1.0f);
	++it;
	TEST_REAL_SIMILAR(it->getMZ(),5.0f);
	TEST_REAL_SIMILAR(it->getRT(),1.5f);
END_SECTION


/////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////
END_TEST



Ejemplo n.º 8
0
  void IBSpectraFile::store(const String& filename, const ConsensusMap& cm)
  {
    // typdefs for shorter code
    typedef std::vector<ProteinHit>::iterator ProtHitIt;

    // general settings .. do we need to expose these?
    // ----------------------------------------------------------------------
    /// Allow also non-unique peptides to be exported
    bool allow_non_unique = true;
    /// Intensities below this value will be set to 0.0 to avoid numerical problems when quantifying
    double intensity_threshold = 0.00001;
    // ----------------------------------------------------------------------


    // guess experiment type
    boost::shared_ptr<IsobaricQuantitationMethod> quantMethod = guessExperimentType_(cm);

    // we need the protein identifications to reference the protein names
    ProteinIdentification protIdent;
    bool has_proteinIdentifications = false;
    if (cm.getProteinIdentifications().size() > 0)
    {
      protIdent = cm.getProteinIdentifications()[0];
      has_proteinIdentifications = true;
    }

    // start the file by adding the tsv header
    TextFile textFile;
    textFile.addLine(ListUtils::concatenate(constructHeader_(*quantMethod), "\t"));

    for (ConsensusMap::ConstIterator cm_iter = cm.begin();
         cm_iter != cm.end();
         ++cm_iter)
    {
      const ConsensusFeature& cFeature = *cm_iter;
      std::vector<IdCSV> entries;

      /// 1st we extract the identification information from the consensus feature
      if (cFeature.getPeptideIdentifications().size() == 0 || !has_proteinIdentifications)
      {
        // we store unidentified hits anyway, because the iTRAQ quant is still helpful for normalization
        entries.push_back(IdCSV());
      }
      else
      {
        // protein name:
        const PeptideHit& peptide_hit = cFeature.getPeptideIdentifications()[0].getHits()[0];
        std::set<String> protein_accessions = peptide_hit.extractProteinAccessions();
        if (protein_accessions.size() != 1)
        {
          if (!allow_non_unique) continue; // we only want unique peptides
        }

        for (std::set<String>::const_iterator prot_ac = protein_accessions.begin(); prot_ac != protein_accessions.end(); ++prot_ac)
        {
          IdCSV entry;
          entry.charge = cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge();
          entry.peptide = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().toUnmodifiedString();
          entry.theo_mass = cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, cFeature.getPeptideIdentifications()[0].getHits()[0].getCharge());

          // write modif
          entry.modif = getModifString_(cFeature.getPeptideIdentifications()[0].getHits()[0].getSequence());

          ProtHitIt proteinHit = protIdent.findHit(*prot_ac);
          if (proteinHit == protIdent.getHits().end())
          {
            std::cerr << "Protein referenced in peptide not found...\n";
            continue; // protein not found
          }

          entry.accession = proteinHit->getAccession();
          entries.push_back(entry);
        }
      }

      // 2nd we add the quantitative information of the channels

      // .. skip features with 0 intensity
      if (cFeature.getIntensity() == 0)
      {
        continue;
      }

      for (std::vector<IdCSV>::iterator entry = entries.begin();
           entry != entries.end();
           ++entry)
      {
        // set parent intensity
        entry->parent_intens = cFeature.getIntensity();
        entry->retention_time = cFeature.getRT();
        entry->spectrum = cFeature.getUniqueId();
        entry->exp_mass = cFeature.getMZ();

        // create output line
        StringList currentLine;

        // add entry to currentLine
        entry->toStringList(currentLine);

        // extract channel intensities and positions
        std::map<Int, double> intensityMap;
        ConsensusFeature::HandleSetType features = cFeature.getFeatures();

        for (ConsensusFeature::HandleSetType::const_iterator fIt = features.begin();
             fIt != features.end();
             ++fIt)
        {
          intensityMap[Int(fIt->getMZ())] = (fIt->getIntensity() > intensity_threshold ? fIt->getIntensity() : 0.0);
        }
        for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin();
             it != quantMethod->getChannelInformation().end();
             ++it)
        {
          currentLine.push_back(String(it->center));
        }
        for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator it = quantMethod->getChannelInformation().begin();
             it != quantMethod->getChannelInformation().end();
             ++it)
        {
          currentLine.push_back(String(intensityMap[int(it->center)]));
        }

        textFile.addLine(ListUtils::concatenate(currentLine, "\t"));
      }
    }

    // write to file
    textFile.store(filename);
  }
Ejemplo n.º 9
0
//features
TEST_EQUAL(map.size(), 6)
ConsensusFeature cons_feature = map[0];
TEST_REAL_SIMILAR(cons_feature.getRT(), 1273.27)
TEST_REAL_SIMILAR(cons_feature.getMZ(), 904.47)
TEST_REAL_SIMILAR(cons_feature.getIntensity(), 3.12539e+07)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1273.27)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1273.27)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 904.47)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[1], 904.47)
TEST_REAL_SIMILAR(cons_feature.getIntensityRange().minPosition()[0], 3.12539e+07)
TEST_REAL_SIMILAR(cons_feature.getIntensityRange().maxPosition()[0], 3.12539e+07)
TEST_REAL_SIMILAR(cons_feature.getQuality(), 1.1)
TEST_EQUAL(cons_feature.getMetaValue("peptide_id") == DataValue("RefSeq:NC_1234"), true)
ConsensusFeature::HandleSetType::const_iterator it = cons_feature.begin();
TEST_REAL_SIMILAR(it->getIntensity(), 3.12539e+07)

cons_feature = map[5];
TEST_REAL_SIMILAR(cons_feature.getRT(), 1194.82)
TEST_REAL_SIMILAR(cons_feature.getMZ(), 777.101)
TEST_REAL_SIMILAR(cons_feature.getIntensity(), 1.78215e+07)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1194.82)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1194.82)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 777.101)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[1], 777.101)
TEST_REAL_SIMILAR(cons_feature.getIntensityRange().minPosition()[0], 1.78215e+07)
TEST_REAL_SIMILAR(cons_feature.getIntensityRange().maxPosition()[0], 1.78215e+07)
TEST_REAL_SIMILAR(cons_feature.getQuality(), 0.0)
it = cons_feature.begin();
TEST_REAL_SIMILAR(it->getIntensity(), 1.78215e+07)