Exemplo n.º 1
0
  //TODO include run information for each peptide
  //includes all MSMS derived peptides into the graph --consensusXML
  Size ProteinResolver::includeMSMSPeptides_(ConsensusMap & consensus, vector<PeptideEntry> & peptide_nodes)
  {
    Size found_peptide = 0;
    for (Size pep = 0; pep != consensus.size(); ++pep)
    {
      ConsensusFeature & feature = consensus.at(pep);

      // get all peptide identifications
      const vector<PeptideIdentification> & pep_id  = feature.getPeptideIdentifications();


      for (Size cons_pep = 0; cons_pep < pep_id.size(); ++cons_pep)
      {
        String seq = pep_id.at(cons_pep).getHits().front().getSequence().toUnmodifiedString();
        Size peptide_entry = findPeptideEntry_(seq, peptide_nodes);

        if (peptide_entry != peptide_nodes.size())
        {
          if (!peptide_nodes.at(peptide_entry).experimental)
          {
            ++found_peptide;
          }
          //should be changed -- for consensus peptide_identification is the consensus and peptide_hit is the PeptideIdentification. PeptideHit is only top hit at the moment
          peptide_nodes.at(peptide_entry).peptide_identification = pep;
          peptide_nodes.at(peptide_entry).peptide_hit = cons_pep; //only top hit is used at the moment
          peptide_nodes.at(peptide_entry).experimental = true;
          // get intensity of the feature
          peptide_nodes.at(peptide_entry).intensity = feature.getIntensity();
          peptide_nodes.at(peptide_entry).origin = feature.getMetaValue("file_origin");
        }
      }
    }
    return found_peptide;
  }
Exemplo n.º 2
0
  void EDTAFile::store(const String& filename, const ConsensusMap& map) const
  {
    TextFile tf;

    // search for maximum number of sub-features (since this determines the number of columns)
    Size max_sub(0);
    for (Size i = 0; i < map.size(); ++i)
    {
      max_sub = std::max(max_sub, map[i].getFeatures().size());
    }

    // write header
    String header("RT\tm/z\tintensity\tcharge");
    for (Size i = 1; i <= max_sub; ++i)
    {
      header += "\tRT" + String(i) + "\tm/z" + String(i) + "\tintensity" + String(i) + "\tcharge" + String(i);
    }
    tf.addLine(header);

    for (Size i = 0; i < map.size(); ++i)
    {
      ConsensusFeature f = map[i];
      // consensus
      String entry = String(f.getRT()) + "\t" + f.getMZ() + "\t" + f.getIntensity() + "\t" + f.getCharge();
      // sub-features
      ConsensusFeature::HandleSetType handle = f.getFeatures();
      for (ConsensusFeature::HandleSetType::const_iterator it = handle.begin(); it != handle.end(); ++it)
      {
        entry += String("\t") + it->getRT() + "\t" + it->getMZ() + "\t" + it->getIntensity() + "\t" + it->getCharge();
      }
      // missing sub-features
      for (Size j = handle.size(); j < max_sub; ++j)
      {
        entry += "\tNA\tNA\tNA\tNA";
      }
      tf.addLine(entry);
    }


    tf.store(filename);
  }
  void IsobaricQuantifier::computeLabelingStatistics_(ConsensusMap& consensus_map_out)
  {
    // number of total quantified spectra
    stats_.number_ms2_total = consensus_map_out.size();

    // Labeling efficiency statistics
    for (size_t i = 0; i < consensus_map_out.size(); ++i)
    {
      // is whole scan empty?!
      if (consensus_map_out[i].getIntensity() == 0) ++stats_.number_ms2_empty;

      // look at single reporters
      for (ConsensusFeature::HandleSetType::const_iterator it_elements = consensus_map_out[i].begin();
           it_elements != consensus_map_out[i].end();
           ++it_elements)
      {
        if (it_elements->getIntensity() == 0)
        {
          String ch_index = consensus_map_out.getFileDescriptions()[it_elements->getMapIndex()].getMetaValue("channel_name");
          ++stats_.empty_channels[ch_index];
        }
      }
    }
    LOG_INFO << "IsobaricQuantifier: skipped " << stats_.number_ms2_empty << " of " << consensus_map_out.size() << " selected scans due to lack of reporter information:\n";
    consensus_map_out.setMetaValue("isoquant:scans_noquant", stats_.number_ms2_empty);
    consensus_map_out.setMetaValue("isoquant:scans_total", consensus_map_out.size());

    LOG_INFO << "IsobaricQuantifier: channels with signal\n";
    for (std::map<String, Size>::const_iterator it_m = stats_.empty_channels.begin();
         it_m != stats_.empty_channels.end();
         ++it_m)
    {
      LOG_INFO << "      channel " << it_m->first << ": " << (consensus_map_out.size() - it_m->second) << " / " <<  consensus_map_out.size() << " (" << ((consensus_map_out.size() - it_m->second) * 100 / consensus_map_out.size()) << "%)\n";
      consensus_map_out.setMetaValue(String("isoquant:quantifyable_ch") + it_m->first, (consensus_map_out.size() - it_m->second));
    }

  }
 void ConsensusMapNormalizerAlgorithmThreshold::normalizeMaps(ConsensusMap& map, const vector<double>& ratios)
 {
   ConsensusMap::Iterator cf_it;
   ProgressLogger progresslogger;
   progresslogger.setLogType(ProgressLogger::CMD);
   progresslogger.startProgress(0, map.size(), "normalizing maps");
   for (cf_it = map.begin(); cf_it != map.end(); ++cf_it)
   {
     progresslogger.setProgress(cf_it - map.begin());
     ConsensusFeature::HandleSetType::const_iterator f_it;
     for (f_it = cf_it->getFeatures().begin(); f_it != cf_it->getFeatures().end(); ++f_it)
     {
       f_it->asMutable().setIntensity(f_it->getIntensity() * ratios[f_it->getMapIndex()]);
     }
   }
   progresslogger.endProgress();
 }
  void IsobaricChannelExtractor::registerChannelsInOutputMap_(ConsensusMap& consensus_map)
  {
    // register the individual channels in the output consensus map
    Int index = 0;
    for (IsobaricQuantitationMethod::IsobaricChannelList::const_iterator cl_it = quant_method_->getChannelInformation().begin();
         cl_it != quant_method_->getChannelInformation().end();
         ++cl_it)
    {
      ConsensusMap::FileDescription channel_as_map;
      // label is the channel + description provided in the Params
      channel_as_map.label = quant_method_->getName() + "_" + cl_it->name;

      // TODO(aiche): number of features need to be set later
      channel_as_map.size = consensus_map.size();

      // add some more MetaInfo
      channel_as_map.setMetaValue("channel_name", cl_it->name);
      channel_as_map.setMetaValue("channel_id", cl_it->id);
      channel_as_map.setMetaValue("channel_description", cl_it->description);
      channel_as_map.setMetaValue("channel_center", cl_it->center);
      consensus_map.getFileDescriptions()[index++] = channel_as_map;
    }
  }
  feat6.setPosition(pos6);
  feat6.setIntensity(400.0f);
  feat6.setUniqueId(2);
  ConsensusFeature cons4(1,feat4);
  ConsensusFeature cons5(1,feat5);
  ConsensusFeature cons6(1,feat6);
  input[1].push_back(cons4);
  input[1].push_back(cons5);
  input[1].push_back(cons6);

  StablePairFinder spf;
	Param param = spf.getDefaults();
	spf.setParameters(param);
	ConsensusMap result;
	spf.run(input,result);
	TEST_EQUAL(result.size(),3);
	ABORT_IF(result.size()!=3);

  ConsensusFeature::HandleSetType group1 = result[0].getFeatures();
  ConsensusFeature::HandleSetType group2 = result[1].getFeatures();
  ConsensusFeature::HandleSetType group3 = result[2].getFeatures();

  FeatureHandle ind1(0,feat1);
  FeatureHandle ind2(0,feat2);
  FeatureHandle ind3(0,feat3);
  FeatureHandle ind4(1,feat4);
  FeatureHandle ind5(1,feat5);
  FeatureHandle ind6(1,feat6);

  ConsensusFeature::HandleSetType::const_iterator it;
	it = group1.begin();
  vector<double> ConsensusMapNormalizerAlgorithmThreshold::computeCorrelation(const ConsensusMap& map, const double& ratio_threshold, const String& acc_filter, const String& desc_filter)
  {
    Size number_of_features = map.size();
    Size number_of_maps = map.getFileDescriptions().size();
    vector<vector<double> > feature_int(number_of_maps);

    //get map with most features, resize feature_int
    UInt map_with_most_features_idx = 0;
    ConsensusMap::FileDescriptions::const_iterator map_with_most_features = map.getFileDescriptions().find(0);
    for (UInt i = 0; i < number_of_maps; i++)
    {
      feature_int[i].resize(number_of_features);
      ConsensusMap::FileDescriptions::const_iterator it = map.getFileDescriptions().find(i);
      if (it->second.size > map_with_most_features->second.size)
      {
        map_with_most_features = it;
        map_with_most_features_idx = i;
      }
    }

    //fill feature_int with intensities
    Size pass_counter = 0;
    ConsensusMap::ConstIterator cf_it;
    UInt idx = 0;
    for (cf_it = map.begin(); cf_it != map.end(); ++cf_it, ++idx)
    {
      if (!ConsensusMapNormalizerAlgorithmMedian::passesFilters_(cf_it, map, acc_filter, desc_filter))
      {
        continue;
      }
      ++pass_counter;

      ConsensusFeature::HandleSetType::const_iterator f_it;
      for (f_it = cf_it->getFeatures().begin(); f_it != cf_it->getFeatures().end(); ++f_it)
      {
        feature_int[f_it->getMapIndex()][idx] = f_it->getIntensity();
      }
    }

    LOG_INFO << endl << "Using " << pass_counter << "/" << map.size() <<  " consensus features for computing normalization coefficients" << endl << endl;

    //determine ratio
    vector<double> ratio_vector(number_of_maps);
    for (UInt j = 0; j < number_of_maps; j++)
    {
      vector<double> ratios;
      for (UInt k = 0; k < number_of_features; ++k)
      {
        if (feature_int[map_with_most_features_idx][k] != 0.0 && feature_int[j][k] != 0.0)
        {
          double ratio = feature_int[map_with_most_features_idx][k] / feature_int[j][k];
          if (ratio > ratio_threshold && ratio < 1 / ratio_threshold)
          {
            ratios.push_back(ratio);
          }
        }
      }
      if (ratios.empty())
      {
        LOG_WARN << endl << "Not enough features passing filters. Cannot compute normalization coefficients for all maps. Result will be unnormalized." << endl << endl;
        return vector<double>(number_of_maps, 1.0);
      }
      ratio_vector[j] = Math::mean(ratios.begin(), ratios.end());
    }
    return ratio_vector;
  }
Exemplo n.º 8
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input file names
    String in = getStringOption_("in");

    //input file type
    FileHandler fh;
    FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type"));

    if (in_type == FileTypes::UNKNOWN)
    {
      in_type = fh.getType(in);
      writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2);
    }

    if (in_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine input file type!");
      return PARSE_ERROR;
    }


    //output file names and types
    String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));

    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }

    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    bool TIC_DTA2D = getFlag_("TIC_DTA2D");

    writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 1);

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    typedef MSExperiment<Peak1D> MSExperimentType;
    MSExperimentType exp;

    typedef MSExperimentType::SpectrumType SpectrumType;

    typedef FeatureMap<> FeatureMapType;

    FeatureMapType fm;
    ConsensusMap cm;

    writeDebug_(String("Loading input file"), 1);

    if (in_type == FileTypes::CONSENSUSXML)
    {
      ConsensusXMLFile().load(in, cm);
      cm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You you will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting consensus features to peaks. You will lose information!");
        exp.set2DData(cm);
      }
    }
    else if (in_type == FileTypes::EDTA)
    {
      EDTAFile().load(in, cm);
      cm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You you will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting consensus features to peaks. You will lose information!");
        exp.set2DData(cm);
      }
    }
    else if (in_type == FileTypes::FEATUREXML ||
             in_type == FileTypes::TSV ||
             in_type == FileTypes::PEPLIST ||
             in_type == FileTypes::KROENIK)
    {
      fh.loadFeatures(in, fm, in_type);
      fm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting features to peaks. You will lose information! Mass traces are added, if present as 'num_of_masstraces' and 'masstrace_intensity_<X>' (X>=0) meta values.");
        exp.set2DData<true>(fm);
      }
    }
    else
    {
      fh.loadExperiment(in, exp, in_type, log_type_);
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    writeDebug_(String("Writing output file"), 1);

    if (out_type == FileTypes::MZML)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZML));
      MzMLFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertSpectraToChromatograms(exp, true);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::MZDATA)
    {
      //annotate output with data processing info
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZDATA));
      MzDataFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::MZXML)
    {
      //annotate output with data processing info
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZXML));
      MzXMLFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::DTA2D)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 FORMAT_CONVERSION));
      DTA2DFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      if (TIC_DTA2D)
      {
        // store the total ion chromatogram (TIC)
        f.storeTIC(out, exp);
      }
      else
      {
        // store entire experiment
        f.store(out, exp);
      }


    }
    else if (out_type == FileTypes::MGF)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 FORMAT_CONVERSION));
      MascotGenericFile f;
      f.setLogType(log_type_);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::FEATUREXML)
    {
      if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) ||
          (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK))
      {
        fm.applyMemberFunction(&UniqueIdInterface::setUniqueId);
      }
      else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA)
      {
        ConsensusMap::convert(cm, true, fm);
      }
      else // not loaded as feature map or consensus map
      {
        // The feature specific information is only defaulted. Enough reasons to issue a warning!
        writeLog_("Warning: Converting peaks to features will lead to incomplete features!");
        fm.clear();
        fm.reserve(exp.getSize());
        typedef FeatureMapType::FeatureType FeatureType;
        FeatureType feature;
        feature.setQuality(0, 1); // override default
        feature.setQuality(1, 1); // override default
        feature.setOverallQuality(1); // override default
        for (MSExperimentType::ConstIterator spec_iter = exp.begin();
             spec_iter != exp.end();
             ++spec_iter
             )
        {
          feature.setRT(spec_iter->getRT());
          for (SpectrumType::ConstIterator peak1_iter = spec_iter->begin();
               peak1_iter != spec_iter->end();
               ++peak1_iter
               )
          {
            feature.setMZ(peak1_iter->getMZ());
            feature.setIntensity(peak1_iter->getIntensity());
            feature.setUniqueId();
            fm.push_back(feature);
          }
        }
        fm.updateRanges();
      }

      addDataProcessing_(fm, getProcessingInfo_(DataProcessing::
                                                FORMAT_CONVERSION));
      FeatureXMLFile().store(out, fm);
    }
    else if (out_type == FileTypes::CONSENSUSXML)
    {
      if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) ||
          (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK))
      {
        fm.applyMemberFunction(&UniqueIdInterface::setUniqueId);
        ConsensusMap::convert(0, fm, cm);
      }
      // nothing to do for consensus input
      else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA)
      {
      }
      else // experimental data
      {
        ConsensusMap::convert(0, exp, cm, exp.size());
      }

      addDataProcessing_(cm, getProcessingInfo_(DataProcessing::
                                                FORMAT_CONVERSION));
      ConsensusXMLFile().store(out, cm);
    }
    else if (out_type == FileTypes::EDTA)
    {
      if (fm.size() > 0 && cm.size() > 0)
      {
        LOG_ERROR << "Internal error: cannot decide on container (Consensus or Feature)! This is a bug. Please report it!";
        return INTERNAL_ERROR;
      }
      if (fm.size() > 0) EDTAFile().store(out, fm);
      else if (cm.size() > 0) EDTAFile().store(out, cm);
    }
    else
    {
      writeLog_("Unknown output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    return EXECUTION_OK;
  }
void MapAlignmentEvaluationAlgorithmPrecision::evaluate(const ConsensusMap & consensus_map_in, const ConsensusMap & consensus_map_gt, const double & rt_dev, const double & mz_dev, const Peak2D::IntensityType & int_dev, const bool use_charge, double & out)
{
    //Precision = 1/N * sum ( gt_subtend_tilde_tool_i / tilde_tool_i )

    ConsensusMap cons_map_gt;     /* = consensus_map_gt; */

    for (Size i = 0; i < consensus_map_gt.size(); ++i)
    {
        if (consensus_map_gt[i].size() >= 2)
        {
            cons_map_gt.push_back(consensus_map_gt[i]);
        }
    }

    ConsensusMap cons_map_tool = consensus_map_in;

    std::vector<Size> gt_subtend_tilde_tool;        //holds the numerators of the sum
    std::vector<Size> tilde_tool;               //holds the denominators of the sum

    Size gt_subtend_tilde_tool_i = 0;       //filling material for the vectors
    Size tilde_tool_i = 0;

    Size cons_tool_size = 0;            //size  of the actual consensus feature of the tool
    Size gt_i_subtend_tool_j = 0;       //size of the intersection of the actual cons. feat. of the tool with the c.f. of GT

    double precision = 0;       //holds the output
    double sum = 0;         //intermediate step: the sum

    //loop over all consensus features of the ground truth
    for (Size i = 0; i < cons_map_gt.size(); ++i)      //N = cons_map_gt.size()
    {

        ConsensusFeature & gt_elem = cons_map_gt[i];

        //for every i = 1, ..., N:
        gt_subtend_tilde_tool_i = 0;
        tilde_tool_i = 0;

        //loop over all consensus features of the tool's consensus map
        for (Size j = 0; j < cons_map_tool.size(); ++j)
        {
            ConsensusFeature & tool_elem = cons_map_tool[j];
            cons_tool_size = cons_map_tool[j].size();

            gt_i_subtend_tool_j = 0;

            //loop over all features in the ith consensus feature of the gt
            for (HandleIterator gt_it = gt_elem.begin(); gt_it != gt_elem.end(); ++gt_it)
            {
                //loop over all features in the jth consensus feature of the tool's map
                for (HandleIterator tool_it = tool_elem.begin(); tool_it != tool_elem.end(); ++tool_it)
                {
                    //++cons_tool_size;

                    if (isSameHandle(*tool_it, *gt_it, rt_dev, mz_dev, int_dev, use_charge))
                    {
                        ++gt_i_subtend_tool_j;
                        break;
                    }
                }

            }
            if ((cons_tool_size >= 2) && (gt_i_subtend_tool_j > 0))
            {
                gt_subtend_tilde_tool_i += gt_i_subtend_tool_j;
                tilde_tool_i += cons_tool_size;
            }
        }

        gt_subtend_tilde_tool.push_back(gt_subtend_tilde_tool_i);
        tilde_tool.push_back(tilde_tool_i);
    }
    for (Size k = 0; k < gt_subtend_tilde_tool.size(); ++k)
    {
        double fraction = 0;        //intermediate step: the fraction

        if (gt_subtend_tilde_tool[k] != 0)
        {
            fraction = double(gt_subtend_tilde_tool[k]) / double(tilde_tool[k]);
        }
        sum += fraction;
    }
    precision = (1.0 / double(cons_map_gt.size())) * sum;
    out = precision;
}
Exemplo n.º 10
0
	p.setValue("mz_pair_dists",ListUtils::create<double>(4.0));
	p.setValue("mz_dev",0.6);
	pm.setParameters(p);

	ConsensusMap output;
	TEST_EXCEPTION(Exception::IllegalArgument,pm.run(vector<ConsensusMap>(),output));
	vector<ConsensusMap> input(1);
	MapConversion::convert(5,features,input[0]);
	output.getColumnHeaders()[5].label = "light";
	output.getColumnHeaders()[5].filename = "filename";
	output.getColumnHeaders()[8] = output.getColumnHeaders()[5];
	output.getColumnHeaders()[8].label = "heavy";

	pm.run(input,output);

	TEST_EQUAL(output.size(),1);
	ABORT_IF(output.size()!=1)
	TEST_REAL_SIMILAR(output[0].begin()->getMZ(),1.0f);
	TEST_REAL_SIMILAR(output[0].begin()->getRT(),1.0f);
	TEST_REAL_SIMILAR(output[0].rbegin()->getMZ(),5.0f);
	TEST_REAL_SIMILAR(output[0].rbegin()->getRT(),1.5f);
	TEST_REAL_SIMILAR(output[0].getQuality(),0.959346);
	TEST_EQUAL(output[0].getCharge(),1);

	//test automated RT parameter estimation
	LabeledPairFinder pm2;
	Param p2;
	p2.setValue("rt_estimate","true");
	p2.setValue("mz_pair_dists", ListUtils::create<double>(4.0));
	p2.setValue("mz_dev",0.2);
	pm2.setParameters(p2);
Exemplo n.º 11
0
    TEST_EQUAL(cm_out.getFileDescriptions()[1].getMetaValue("channel_center"), 115.1082)

    TEST_EQUAL(cm_out.getFileDescriptions()[2].label, "itraq4plex_116")
    TEST_EQUAL(cm_out.getFileDescriptions()[2].getMetaValue("channel_name"), 116)
    TEST_EQUAL(cm_out.getFileDescriptions()[2].getMetaValue("channel_id"), 2)
    TEST_EQUAL(cm_out.getFileDescriptions()[2].getMetaValue("channel_description"), "else")
    TEST_EQUAL(cm_out.getFileDescriptions()[2].getMetaValue("channel_center"), 116.1116)

    TEST_EQUAL(cm_out.getFileDescriptions()[3].label, "itraq4plex_117")
    TEST_EQUAL(cm_out.getFileDescriptions()[3].getMetaValue("channel_name"), 117)
    TEST_EQUAL(cm_out.getFileDescriptions()[3].getMetaValue("channel_id"), 3)
    TEST_EQUAL(cm_out.getFileDescriptions()[3].getMetaValue("channel_description"), "")
    TEST_EQUAL(cm_out.getFileDescriptions()[3].getMetaValue("channel_center"), 117.1149)

    // compare results
    TEST_EQUAL(cm_out.size(), 5)
    ABORT_IF(cm_out.size() != 5)
    ConsensusFeature::iterator cf_it;

    TEST_EQUAL(cm_out[0].size(), 4)
    TEST_EQUAL(cm_out[0].getMetaValue("scan_id"), "controllerType=0 controllerNumber=1 scan=2")
    TEST_REAL_SIMILAR(cm_out[0].getMetaValue("precursor_intensity"), 5251952.5)
    TEST_REAL_SIMILAR(cm_out[0].getMetaValue("precursor_charge"), 2)
    TEST_REAL_SIMILAR(cm_out[0].getIntensity(), 1490501.21)
    cf_it = cm_out[0].begin();
    TEST_REAL_SIMILAR(cf_it->getIntensity(), 643005.56)
    ++cf_it;
    TEST_REAL_SIMILAR(cf_it->getIntensity(), 458708.97)
    ++cf_it;
    TEST_REAL_SIMILAR(cf_it->getIntensity(), 182238.38)
    ++cf_it;
Exemplo n.º 12
0
  void EDTAFile::load(const String& filename, ConsensusMap& consensus_map)
  {
    // load input
    TextFile input(filename);
    TextFile::ConstIterator input_it = input.begin();

    // reset map
    consensus_map = ConsensusMap();
    consensus_map.setUniqueId();

    char separator = ' ';
    if (input_it->hasSubstring("\t"))
      separator = '\t';
    else if (input_it->hasSubstring(" "))
      separator = ' ';
    else if (input_it->hasSubstring(","))
      separator = ',';

    // parsing header line
    std::vector<String> headers;
    input_it->split(separator, headers);
    int offset = 0;
    for (Size i = 0; i < headers.size(); ++i)
    {
      headers[i].trim();
    }
    String header_trimmed = *input.begin();
    header_trimmed.trim();

    enum
    {
      TYPE_UNDEFINED,
      TYPE_OLD_NOCHARGE,
      TYPE_OLD_CHARGE,
      TYPE_CONSENSUS
    }
    input_type = TYPE_UNDEFINED;
    Size input_features = 1;

    double rt = 0.0;
    double mz = 0.0;
    double it = 0.0;
    Int ch = 0;

    if (headers.size() <= 2)
    {
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: not enough columns! Expected at least 3 columns!\nOffending line: '") + header_trimmed + "'  (line 1)\n");
    }
    else if (headers.size() == 3)
      input_type = TYPE_OLD_NOCHARGE;
    else if (headers.size() == 4)
      input_type = TYPE_OLD_CHARGE;

    // see if we have a header
    try
    {
      // try to convert... if not: thats a header
      rt = headers[0].toDouble();
      mz = headers[1].toDouble();
      it = headers[2].toDouble();
    }
    catch (Exception::BaseException&)
    {
      offset = 1;
      ++input_it;
      LOG_INFO << "Detected a header line.\n";
    }

    if (headers.size() >= 5)
    {
      if (String(headers[4].trim()).toUpper() == "RT1")
        input_type = TYPE_CONSENSUS;
      else
        input_type = TYPE_OLD_CHARGE;
    }
    if (input_type == TYPE_CONSENSUS)
    {
      // Every consensus style line includes features with four columns.
      // The remainder is meta data
      input_features = headers.size() / 4;
    }

    if (offset == 0 && (input_type == TYPE_OLD_CHARGE || input_type == TYPE_CONSENSUS))
    {
      throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line 1: No HEADER provided. This is only allowed for three columns. You have more!\nOffending line: '") + header_trimmed + "'  (line 1)\n");
    }

    SignedSize input_size = input.end() - input.begin();

    ConsensusMap::FileDescription desc;
    desc.filename = filename;
    desc.size = (input_size) - offset;
    consensus_map.getFileDescriptions()[0] = desc;

    // parsing features
    consensus_map.reserve(input_size);

    for (; input_it != input.end(); ++input_it)
    {
      //do nothing for empty lines
      String line_trimmed = *input_it;
      line_trimmed.trim();
      if (line_trimmed == "")
      {
        if ((input_it - input.begin()) < input_size - 1) LOG_WARN << "Notice: Empty line ignored (line " << ((input_it - input.begin()) + 1) << ").";
        continue;
      }

      //split line to tokens
      std::vector<String> parts;
      input_it->split(separator, parts);

      //abort if line does not contain enough fields
      if (parts.size() < 3)
      {
        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "",
                                    String("Failed parsing in line ")
                                    + String((input_it - input.begin()) + 1)
                                    + ": At least three columns are needed! (got  "
                                    + String(parts.size())
                                    + ")\nOffending line: '"
                                    + line_trimmed
                                    + "'  (line "
                                    + String((input_it - input.begin()) + 1)
                                    + ")\n");
      }

      ConsensusFeature cf;
      cf.setUniqueId();

      try
      {
        // Convert values. Will return -1 if not available.
        rt = checkedToDouble_(parts, 0);
        mz = checkedToDouble_(parts, 1);
        it = checkedToDouble_(parts, 2);
        ch = checkedToInt_(parts, 3);

        cf.setRT(rt);
        cf.setMZ(mz);
        cf.setIntensity(it);
        if (input_type != TYPE_OLD_NOCHARGE)
          cf.setCharge(ch);
      }
      catch (Exception::BaseException&)
      {
        throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert the first three columns to a number!\nOffending line: '" + line_trimmed + "'  (line " + String((input_it - input.begin()) + 1) + ")\n");
      }

      // Check all features in one line
      for (Size j = 1; j < input_features; ++j)
      {
        try
        {
          Feature f;
          f.setUniqueId();

          // Convert values. Will return -1 if not available.
          rt = checkedToDouble_(parts, j * 4 + 0);
          mz = checkedToDouble_(parts, j * 4 + 1);
          it = checkedToDouble_(parts, j * 4 + 2);
          ch = checkedToInt_(parts, j * 4 + 3);

          // Only accept features with at least RT and MZ set
          if (rt != -1 && mz != -1)
          {
            f.setRT(rt);
            f.setMZ(mz);
            f.setIntensity(it);
            f.setCharge(ch);

            cf.insert(j - 1, f);
          }
        }
        catch (Exception::BaseException&)
        {
          throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "", String("Failed parsing in line ") + String((input_it - input.begin()) + 1) + ": Could not convert one of the four sub-feature columns (starting at column " + (j * 4 + 1) + ") to a number! Is the correct separator specified?\nOffending line: '" + line_trimmed + "'  (line " + String((input_it - input.begin()) + 1) + ")\n");
        }
      }

      //parse meta data
      for (Size j = input_features * 4; j < parts.size(); ++j)
      {
        String part_trimmed = parts[j];
        part_trimmed.trim();
        if (part_trimmed != "")
        {
          //check if column name is ok
          if (headers.size() <= j || headers[j] == "")
          {
            throw Exception::ParseError(__FILE__, __LINE__, __PRETTY_FUNCTION__, "",
                                        String("Error: Missing meta data header for column ") + (j + 1) + "!"
                                        + String("Offending header line: '") + header_trimmed + "'  (line 1)");
          }
          //add meta value
          cf.setMetaValue(headers[j], part_trimmed);
        }
      }

      //insert feature to map
      consensus_map.push_back(cf);
    }

    // register FileDescriptions
    ConsensusMap::FileDescription fd;
    fd.filename = filename;
    fd.size = consensus_map.size();
    Size maps = std::max(input_features - 1, Size(1)); // its either a simple feature or a consensus map
    // (in this case the 'input_features' includes the centroid, which we do not count)
    for (Size i = 0; i < maps; ++i)
    {
      fd.label = String("EDTA_Map ") + String(i);
      consensus_map.getFileDescriptions()[i] = fd;
    }

  }
Exemplo n.º 13
0
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits().size(), 2)
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[0].getSequence(), "C")
TEST_EQUAL(map[0].getPeptideIdentifications()[1].getHits()[1].getSequence(), "D")
TEST_EQUAL(map[1].getPeptideIdentifications().size(), 1)
TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits().size(), 1)
TEST_EQUAL(map[1].getPeptideIdentifications()[0].getHits()[0].getSequence(), "E")
//unassigned peptide identifications
TEST_EQUAL(map.getUnassignedPeptideIdentifications().size(), 2)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits().size(), 1)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[0].getHits()[0].getSequence(), "F")
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits().size(), 2)
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[0].getSequence(), "G")
TEST_EQUAL(map.getUnassignedPeptideIdentifications()[1].getHits()[1].getSequence(), "H")

//features
TEST_EQUAL(map.size(), 6)
ConsensusFeature cons_feature = map[0];
TEST_REAL_SIMILAR(cons_feature.getRT(), 1273.27)
TEST_REAL_SIMILAR(cons_feature.getMZ(), 904.47)
TEST_REAL_SIMILAR(cons_feature.getIntensity(), 3.12539e+07)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[0], 1273.27)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[0], 1273.27)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().minPosition()[1], 904.47)
TEST_REAL_SIMILAR(cons_feature.getPositionRange().maxPosition()[1], 904.47)
TEST_REAL_SIMILAR(cons_feature.getIntensityRange().minPosition()[0], 3.12539e+07)
TEST_REAL_SIMILAR(cons_feature.getIntensityRange().maxPosition()[0], 3.12539e+07)
TEST_REAL_SIMILAR(cons_feature.getQuality(), 1.1)
TEST_EQUAL(cons_feature.getMetaValue("peptide_id") == DataValue("RefSeq:NC_1234"), true)
ConsensusFeature::HandleSetType::const_iterator it = cons_feature.begin();
TEST_REAL_SIMILAR(it->getIntensity(), 3.12539e+07)
Exemplo n.º 14
0
  ExitCodes outputTo(ostream& os)
  {
    //-------------------------------------------------------------
    // Parameter handling
    //-------------------------------------------------------------

    // File names
    String in = getStringOption_("in");

    // File type
    FileHandler fh;
    FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type"));

    if (in_type == FileTypes::UNKNOWN)
    {
      in_type = fh.getType(in);
      writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2);
    }

    if (in_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine input file type!");
      return PARSE_ERROR;
    }

    MSExperiment<Peak1D> exp;
    FeatureMap feat;
    ConsensusMap cons;

    if (in_type == FileTypes::FEATUREXML) //features
    {
      FeatureXMLFile().load(in, feat);
      feat.updateRanges();
    }
    else if (in_type == FileTypes::CONSENSUSXML)     //consensus features
    {
      ConsensusXMLFile().load(in, cons);
      cons.updateRanges();
    }

    //-------------------------------------------------------------
    // meta information
    //-------------------------------------------------------------
    if (getFlag_("m"))
    {
      os << endl
         << "-- General information --" << endl
         << endl
         << "file name: " << in << endl
         << "file type: " <<  FileTypes::typeToName(in_type) << endl;

      //basic info
      os << endl
         << "-- Meta information --" << endl
         << endl;

      if (in_type == FileTypes::FEATUREXML) //features
      {
        os << "Document id       : " << feat.getIdentifier() << endl << endl;
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        os << "Document id       : " << cons.getIdentifier() << endl << endl;
      }
    }

    //-------------------------------------------------------------
    // data processing
    //-------------------------------------------------------------
    if (getFlag_("p"))
    {
      //basic info
      os << endl
         << "-- Data processing information --" << endl
         << endl;

      //get data processing info
      vector<DataProcessing> dp;
      if (in_type == FileTypes::FEATUREXML) //features
      {
        dp = feat.getDataProcessing();
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        dp = cons.getDataProcessing();
      }
      int i = 0;
      for (vector<DataProcessing>::iterator it = dp.begin(); it != dp.end(); ++it)
      {
        os << "Data processing " << i << endl;
        os << "\tcompletion_time:   " << (*it).getCompletionTime().getDate() << 'T' << (*it).getCompletionTime().getTime() << endl;
        os << "\tsoftware name:     " << (*it).getSoftware().getName() << " version " << (*it).getSoftware().getVersion() << endl;
        for (set<DataProcessing::ProcessingAction>::const_iterator paIt = (*it).getProcessingActions().begin(); paIt != (*it).getProcessingActions().end(); ++paIt)
        {
          os << "\t\tprocessing action: " << DataProcessing::NamesOfProcessingAction[*paIt] << endl;
        }
      }
      ++i;
    }

    //-------------------------------------------------------------
    // statistics
    //-------------------------------------------------------------
    if (getFlag_("s"))
    {
      //-------------------------------------------------------------
      // Content statistics
      //-------------------------------------------------------------
      Map<String, int> meta_names;
      if (in_type == FileTypes::FEATUREXML) //features
      {
        os << "Number of features: " << feat.size() << endl
           << endl
           << "Ranges:" << endl
           << "  retention time:  " << String::number(feat.getMin()[Peak2D::RT], 2) << " : " << String::number(feat.getMax()[Peak2D::RT], 2) << endl
           << "  mass-to-charge:  " << String::number(feat.getMin()[Peak2D::MZ], 2) << " : " << String::number(feat.getMax()[Peak2D::MZ], 2) << endl
           << "  intensity:       " << String::number(feat.getMinInt(), 2) << " : " << String::number(feat.getMaxInt(), 2) << endl
           << endl;

        // Charge distribution
        Map<UInt, UInt> charges;
        for (Size i = 0; i < feat.size(); ++i)
        {
          charges[feat[i].getCharge()]++;
        }

        os << "Charge distribution" << endl;
        for (Map<UInt, UInt>::const_iterator it = charges.begin();
             it != charges.end(); ++it)
        {
          os << "charge " << it->first << ": " << it->second << endl;
        }
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        map<Size, UInt> num_consfeat_of_size;
        for (ConsensusMap::const_iterator cmit = cons.begin();
             cmit != cons.end(); ++cmit)
        {
          ++num_consfeat_of_size[cmit->size()];
        }

        os << endl << "Number of consensus features:" << endl;
        for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i)
        {
          os << "  of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl;
        }
        os << "  total:      " << setw(6) << cons.size() << endl << endl;

        os << "Ranges:" << endl
           << "  retention time:  " << String::number(cons.getMin()[Peak2D::RT], 2) << " : " << String::number(cons.getMax()[Peak2D::RT], 2) << endl
           << "  mass-to-charge:  " << String::number(cons.getMin()[Peak2D::MZ], 2) << " : " << String::number(cons.getMax()[Peak2D::MZ], 2) << endl
           << "  intensity:       " << String::number(cons.getMinInt(), 2) << " : " << String::number(cons.getMaxInt(), 2) << endl;

        // file descriptions
        const ConsensusMap::FileDescriptions& descs = cons.getFileDescriptions();
        if (!descs.empty())
        {
          os << endl <<
          "File descriptions:" << endl;
          for (ConsensusMap::FileDescriptions::const_iterator it = descs.begin(); it != descs.end(); ++it)
          {
            os << " - " << it->second.filename << endl
               << "   identifier: " << it->first << endl
               << "   label     : " << it->second.label << endl
               << "   size      : " << it->second.size << endl;
          }
        }
      }

      os << endl
         << "-- Summary Statistics --" << endl
         << endl;

    }

    if (in_type == FileTypes::FEATUREXML) //features
    {
      feat.sortByRT();

      vector<double> slice_stats;
      Size n = getIntOption_("n");

      Size begin = 0;
      Size end = 0;
      os << "#slice\tRT_begin\tRT_end\tnumber_of_features\ttic\t"
         << "int_mean\tint_stddev\tint_min\tint_max\tint_median\tint_lowerq\tint_upperq\t"
         << "mz_mean\tmz_stddev\tmz_min\tmz_max\tmz_median\tmz_lowerq\tmz_upperq\t"
         << "width_mean\twidth_stddev\twidth_min\twidth_max\twidth_median\twidth_lowerq\twidth_upperq\t"
         << "qual_mean\tqual_stddev\tqual_min\tqual_max\tqual_median\tqual_lowerq\tqual_upperq\t"
         << "rt_qual_mean\trt_qual_stddev\trt_qual_min\trt_qual_max\trt_qual_median\trt_qual_lowerq\trt_qual_upperq\t"
         << "mz_qual_mean\tmz_qual_stddev\tmz_qual_min\tmz_qual_max\tmz_qual_median\tmz_qual_lowerq\tmz_qual_upperq"
         << endl;

      double rt_begin = 0.0;
      for (Size slice = 0; slice < n; ++slice)
      {
        // Determine slice boundaries.
        double rt_end = feat.back().getRT() / (double)n * (slice + 1);
        for (end = begin; end < feat.size() && feat[end].getRT() < rt_end; ++end) {}

        // Compute statistics on all features in this slice.
        slice_stats = sliceStatistics(feat, begin, end);

        // Write the beginning and end of the slices to the output as well as the slice index.
        os << slice << "\t" << rt_begin << "\t" << rt_end << "\t" << end - begin << "\t";

        // Write the statistics as a line of an csv file
        copy(slice_stats.begin(), slice_stats.end(), ostream_iterator<double>(os, "\t"));
        os << endl;

        begin = end;
        rt_begin = rt_end;
      }
    }
    else if (in_type == FileTypes::CONSENSUSXML)     //consensus features
    {
      Size size = cons.size();

      vector<double> intensities;
      intensities.reserve(size);
      vector<double> qualities(size);
      qualities.reserve(size);
      vector<double> widths(size);
      widths.reserve(size);

      vector<double> rt_delta_by_elems;
      vector<double> rt_aad_by_elems;
      vector<double> rt_aad_by_cfs;
      rt_aad_by_cfs.reserve(size);

      vector<double> mz_delta_by_elems;
      vector<double> mz_aad_by_elems;
      vector<double> mz_aad_by_cfs;
      mz_aad_by_cfs.reserve(size);

      vector<double> it_delta_by_elems;
      vector<double> it_aad_by_elems;
      vector<double> it_aad_by_cfs;
      it_aad_by_cfs.reserve(size);

      for (ConsensusMap::const_iterator cm_iter = cons.begin();
           cm_iter != cons.end(); ++cm_iter)
      {
        double rt_aad = 0;
        double mz_aad = 0;
        double it_aad = 0;
        intensities.push_back(cm_iter->getIntensity());
        qualities.push_back(cm_iter->getQuality());
        widths.push_back(cm_iter->getWidth());
        for (ConsensusFeature::HandleSetType::const_iterator hs_iter = cm_iter->begin();
             hs_iter != cm_iter->end(); ++hs_iter)
        {
          double rt_diff = hs_iter->getRT() - cm_iter->getRT();
          rt_delta_by_elems.push_back(rt_diff);
          if (rt_diff < 0)
          {
            rt_diff = -rt_diff;
          }
          rt_aad_by_elems.push_back(rt_diff);
          rt_aad += rt_diff;
          double mz_diff = hs_iter->getMZ() - cm_iter->getMZ();
          mz_delta_by_elems.push_back(mz_diff);
          if (mz_diff < 0)
          {
            mz_diff = -mz_diff;
          }
          mz_aad_by_elems.push_back(mz_diff);
          mz_aad += mz_diff;
          double it_ratio = hs_iter->getIntensity() / (cm_iter->getIntensity() ? cm_iter->getIntensity() : 1.);
          it_delta_by_elems.push_back(it_ratio);
          if (it_ratio < 1.)
          {
            it_ratio = 1. / it_ratio;
          }
          it_aad_by_elems.push_back(it_ratio);
          it_aad += it_ratio;
        }
        if (!cm_iter->empty())
        {
          rt_aad /= cm_iter->size();
          mz_aad /= cm_iter->size();
          it_aad /= cm_iter->size();
        } // otherwise rt_aad etc. are 0 anyway
        rt_aad_by_cfs.push_back(rt_aad);
        mz_aad_by_cfs.push_back(mz_aad);
        it_aad_by_cfs.push_back(it_aad);
      }

      OpenMS::SomeStatistics some_statistics;

      os.precision(writtenDigits(ConsensusFeature::IntensityType()));
      os << "Intensities of consensus features:" << endl << some_statistics(intensities) << endl;

      os.precision(writtenDigits(ConsensusFeature::QualityType()));
      os << "Qualities of consensus features:" << endl << some_statistics(qualities) << endl;

      os.precision(writtenDigits(ConsensusFeature::CoordinateType()));
      os << "Retention time differences ( element-center, weight 1 per element):" << endl << some_statistics(rt_delta_by_elems) << endl;
      os << "Absolute retention time differences ( |element-center|, weight 1 per element):" << endl << some_statistics(rt_aad_by_elems) << endl;
      os << "Average absolute differences of retention time within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(rt_aad_by_cfs) << endl;

      os.precision(writtenDigits(ConsensusFeature::CoordinateType()));
      os << "Mass-to-charge differences ( element-center, weight 1 per element):" << endl << some_statistics(mz_delta_by_elems) << endl;
      os << "Absolute differences of mass-to-charge ( |element-center|, weight 1 per element):" << endl << some_statistics(mz_aad_by_elems) << endl;
      os << "Average absolute differences of mass-to-charge within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(mz_aad_by_cfs) << endl;

      os.precision(writtenDigits(ConsensusFeature::IntensityType()));
      os << "Intensity ratios ( element/center, weight 1 per element):" << endl << some_statistics(it_delta_by_elems) << endl;
      os << "Relative intensity error ( max{(element/center),(center/element)}, weight 1 per element):" << endl << some_statistics(it_aad_by_elems) << endl;
      os << "Average relative intensity error within consensus features ( max{(element/center),(center/element)}, weight 1 per consensus features):" << endl << some_statistics(it_aad_by_cfs) << endl;
    }

    return EXECUTION_OK;
  }
Exemplo n.º 15
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    StringList in = getStringList_("in");
    String edta = getStringOption_("pos");
    String out = getStringOption_("out");
    String out_sep = getStringOption_("out_separator");
    String out_TIC_debug = getStringOption_("auto_rt:out_debug_TIC");

    StringList in_header = getStringList_("in_header");


    // number of out_debug_TIC files and input files must be identical
    /*if (out_TIC_debug.size() > 0 && in.size() != out_TIC_debug.size())
    {
        LOG_FATAL_ERROR << "Error: number of input file 'in' and auto_rt:out_debug_TIC files must be identical!" << std::endl;
        return ILLEGAL_PARAMETERS;
    }*/

    // number of header files and input files must be identical
    if (in_header.size() > 0 && in.size() != in_header.size())
    {
      LOG_FATAL_ERROR << "Error: number of input file 'in' and 'in_header' files must be identical!" << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    if (!getFlag_("auto_rt:enabled") && !out_TIC_debug.empty())
    {
      LOG_FATAL_ERROR << "Error: TIC output file requested, but auto_rt is not enabled! Either do not request the file or switch on 'auto_rt:enabled'." << std::endl;
      return ILLEGAL_PARAMETERS;
    }

    double rttol = getDoubleOption_("rt_tol");
    double mztol = getDoubleOption_("mz_tol");
    Size rt_collect = getIntOption_("rt_collect");

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    MzMLFile mzml_file;
    mzml_file.setLogType(log_type_);
    MSExperiment<Peak1D> exp, exp_pp;

    EDTAFile ed;
    ConsensusMap cm;
    ed.load(edta, cm);

    StringList tf_single_header0, tf_single_header1, tf_single_header2; // header content, for each column

    std::vector<String> vec_single; // one line for each compound, multiple columns per experiment
    vec_single.resize(cm.size());
    for (Size fi = 0; fi < in.size(); ++fi)
    {
      // load raw data
      mzml_file.load(in[fi], exp);
      exp.sortSpectra(true);

      if (exp.empty())
      {
        LOG_WARN << "The given file does not contain any conventional peak data, but might"
                    " contain chromatograms. This tool currently cannot handle them, sorry." << std::endl;
        return INCOMPATIBLE_INPUT_DATA;
      }

      // try to detect RT peaks (only for the first input file -- all others should align!)
      // cm.size() might change in here...
      if (getFlag_("auto_rt:enabled") && fi == 0)
      {
        ConsensusMap cm_local = cm; // we might have different RT peaks for each map if 'auto_rt' is enabled
        cm.clear(false); // reset global list (about to be filled)

        // compute TIC
        MSChromatogram<> tic = exp.getTIC();
        MSSpectrum<> tics, tic_gf, tics_pp, tics_sn;
        for (Size ic = 0; ic < tic.size(); ++ic)
        { // rewrite Chromatogram to MSSpectrum (GaussFilter requires it)
          Peak1D peak;
          peak.setMZ(tic[ic].getRT());
          peak.setIntensity(tic[ic].getIntensity());
          tics.push_back(peak);
        }
        // smooth (no PP_CWT here due to efficiency reasons -- large FWHM take longer!)
        double fwhm = getDoubleOption_("auto_rt:FHWM");
        GaussFilter gf;
        Param p = gf.getParameters();
        p.setValue("gaussian_width", fwhm * 2); // wider than FWHM, just to be sure we have a fully smoothed peak. Merging two peaks is unlikely
        p.setValue("use_ppm_tolerance", "false");
        gf.setParameters(p);
        tic_gf = tics;
        gf.filter(tic_gf);
        // pick peaks
        PeakPickerHiRes pp;
        p = pp.getParameters();
        p.setValue("signal_to_noise", getDoubleOption_("auto_rt:SNThreshold"));
        pp.setParameters(p);
        pp.pick(tic_gf, tics_pp);

        if (tics_pp.size())
        {
          LOG_INFO << "Found " << tics_pp.size() << " auto-rt peaks at: ";
          for (Size ipp = 0; ipp != tics_pp.size(); ++ipp) LOG_INFO << " " << tics_pp[ipp].getMZ();
        }
        else
        {
          LOG_INFO << "Found no auto-rt peaks. Change threshold parameters!";
        }
        LOG_INFO << std::endl;

        if (!out_TIC_debug.empty()) // if debug file was given
        { // store intermediate steps for debug
          MSExperiment<> out_debug;
          out_debug.addChromatogram(toChromatogram(tics));
          out_debug.addChromatogram(toChromatogram(tic_gf));

          SignalToNoiseEstimatorMedian<MSSpectrum<> > snt;
          snt.init(tics);
          for (Size is = 0; is < tics.size(); ++is)
          {
            Peak1D peak;
            peak.setMZ(tic[is].getMZ());
            peak.setIntensity(snt.getSignalToNoise(tics[is]));
            tics_sn.push_back(peak);
          }
          out_debug.addChromatogram(toChromatogram(tics_sn));

          out_debug.addChromatogram(toChromatogram(tics_pp));
          // get rid of "native-id" missing warning
          for (Size id = 0; id < out_debug.size(); ++id) out_debug[id].setNativeID(String("spectrum=") + id);

          mzml_file.store(out_TIC_debug, out_debug);
          LOG_DEBUG << "Storing debug AUTO-RT: " << out_TIC_debug << std::endl;
        }

        // add target EICs: for each m/z with no/negative RT, add all combinations of that m/z with auto-RTs
        // duplicate m/z entries will be ignored!
        // all other lines with positive RT values are copied unaffected
        //do not allow doubles
        std::set<double> mz_doubles;
        for (ConsensusMap::Iterator cit = cm_local.begin(); cit != cm_local.end(); ++cit)
        {
          if (cit->getRT() < 0)
          {
            if (mz_doubles.find(cit->getMZ()) == mz_doubles.end())
            {
              mz_doubles.insert(cit->getMZ());
            }
            else
            {
              LOG_INFO << "Found duplicate m/z entry (" << cit->getMZ() << ") for auto-rt. Skipping ..." << std::endl;
              continue;
            }

            ConsensusMap cm_RT_multiplex;
            for (MSSpectrum<>::ConstIterator itp = tics_pp.begin(); itp != tics_pp.end(); ++itp)
            {
              ConsensusFeature f = *cit;
              f.setRT(itp->getMZ());
              cm.push_back(f);
            }

          }
          else
          { // default feature with no auto-rt
            LOG_INFO << "copying feature with RT " << cit->getRT() << std::endl;
            cm.push_back(*cit);
          }
        }

        // resize, since we have more positions now
        vec_single.resize(cm.size());
      }


      // search for each EIC and add up
      Int not_found(0);
      Map<Size, double> quant;

      String description;
      if (fi < in_header.size())
      {
        HeaderInfo info(in_header[fi]);
        description = info.header_description;
      }

      if (fi == 0)
      { // two additional columns for first file (theoretical RT and m/z)
        tf_single_header0 << "" << "";
        tf_single_header1 << "" << "";
        tf_single_header2 << "RT" << "mz";
      }

      // 5 entries for each input file
      tf_single_header0 << File::basename(in[fi]) << "" << "" << "" << "";
      tf_single_header1 << description << "" << "" << "" << "";
      tf_single_header2 << "RTobs" << "dRT" << "mzobs" << "dppm" << "intensity";

      for (Size i = 0; i < cm.size(); ++i)
      {
        //std::cerr << "Rt" << cm[i].getRT() << "  mz: " << cm[i].getMZ() << " R " <<  cm[i].getMetaValue("rank") << "\n";

        double mz_da = mztol * cm[i].getMZ() / 1e6; // mz tolerance in Dalton
        MSExperiment<>::ConstAreaIterator it = exp.areaBeginConst(cm[i].getRT() - rttol / 2,
                                                                  cm[i].getRT() + rttol / 2,
                                                                  cm[i].getMZ() - mz_da,
                                                                  cm[i].getMZ() + mz_da);
        Peak2D max_peak;
        max_peak.setIntensity(0);
        max_peak.setRT(cm[i].getRT());
        max_peak.setMZ(cm[i].getMZ());
        for (; it != exp.areaEndConst(); ++it)
        {
          if (max_peak.getIntensity() < it->getIntensity())
          {
            max_peak.setIntensity(it->getIntensity());
            max_peak.setRT(it.getRT());
            max_peak.setMZ(it->getMZ());
          }
        }
        double ppm = 0; // observed m/z offset

        if (max_peak.getIntensity() == 0)
        {
          ++not_found;
        }
        else
        {
          // take median for m/z found
          std::vector<double> mz;
          MSExperiment<>::Iterator itm = exp.RTBegin(max_peak.getRT());
          SignedSize low = std::min<SignedSize>(std::distance(exp.begin(), itm), rt_collect);
          SignedSize high = std::min<SignedSize>(std::distance(itm, exp.end()) - 1, rt_collect);
          MSExperiment<>::AreaIterator itt = exp.areaBegin((itm - low)->getRT() - 0.01, (itm + high)->getRT() + 0.01, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da);
          for (; itt != exp.areaEnd(); ++itt)
          {
            mz.push_back(itt->getMZ());
            //std::cerr << "ppm: " << itt.getRT() << " " <<  itt->getMZ() << " " << itt->getIntensity() << std::endl;
          }

          if ((SignedSize)mz.size() > (low + high + 1)) LOG_WARN << "Compound " << i << " has overlapping peaks [" << mz.size() << "/" << low + high + 1 << "]" << std::endl;

          if (!mz.empty())
          {
            double avg_mz = std::accumulate(mz.begin(), mz.end(), 0.0) / double(mz.size());
            //std::cerr << "avg: " << avg_mz << "\n";
            ppm = (avg_mz - cm[i].getMZ()) / cm[i].getMZ() * 1e6;
          }

        }

        // appending the second column set requires separator
        String append_sep = (fi == 0 ? "" : out_sep);

        vec_single[i] += append_sep; // new line
        if (fi == 0)
        {
          vec_single[i] += String(cm[i].getRT()) + out_sep +
                           String(cm[i].getMZ()) + out_sep;
        }
        vec_single[i] += String(max_peak.getRT()) + out_sep +
                         String(max_peak.getRT() - cm[i].getRT()) + out_sep +
                         String(max_peak.getMZ()) + out_sep +
                         String(ppm)  + out_sep +
                         String(max_peak.getIntensity());
      }

      if (not_found) LOG_INFO << "Missing peaks for " << not_found << " compounds in file '" << in[fi] << "'.\n";
    }

    //-------------------------------------------------------------
    // create header
    //-------------------------------------------------------------
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header2, out_sep));
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header1, out_sep));
    vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header0, out_sep));

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    TextFile tf;
    for (std::vector<String>::iterator v_it = vec_single.begin(); v_it != vec_single.end(); ++v_it)
    {
      tf.addLine(*v_it);
    }
    tf.store(out);

    return EXECUTION_OK;
  }
	FeatureHandle handle6(1, static_cast<BaseFeature>(maps[1][0]));
	out.resize(1);
	out[0].insert(handle5);
	out[0].insert(handle6);

	// need an instance of FeatureGroupingAlgorithm:
	String algo_name = Factory<FeatureGroupingAlgorithm>::registeredProducts()[0];
	FeatureGroupingAlgorithm* algo = Factory<FeatureGroupingAlgorithm>::create(
		algo_name);

	algo->transferSubelements(maps, out);

	TEST_EQUAL(out.getFileDescriptions().size(), 4);
	TEST_EQUAL(out.getFileDescriptions()[0].filename, "file1");
	TEST_EQUAL(out.getFileDescriptions()[3].filename, "file4");
	TEST_EQUAL(out.size(), 1);
	TEST_EQUAL(out[0].size(), 4);

	ConsensusFeature::HandleSetType group = out[0].getFeatures();
	ConsensusFeature::HandleSetType::const_iterator it = group.begin();
	handle3.setMapIndex(2);
	handle4.setMapIndex(3);
	TEST_EQUAL(*it++ == handle1, true);
	TEST_EQUAL(*it++ == handle2, true);
	TEST_EQUAL(*it++ == handle3, true);
	TEST_EQUAL(*it++ == handle4, true);
}
END_SECTION


Exemplo n.º 17
0
  void ProteinInference::infer_(ConsensusMap & consensus_map,
                                const size_t protein_idenfication_index,
                                const UInt reference_map)
  {

    ProteinIdentification & protein_ident = consensus_map.getProteinIdentifications()[protein_idenfication_index];
    for (size_t i = 0; i < protein_ident.getHits().size(); ++i)
    {
      // Protein Accession
      String accession = protein_ident.getHits()[i].getAccession();

      // consensus feature -> peptide hit
      Map<size_t, PeptideHit> consensus_to_peptide;

      // search for it in consensus elements:
      for (size_t i_cm = 0; i_cm < consensus_map.size(); ++i_cm)
      {
        std::vector<PeptideHit> peptide_hits;
        for (std::vector<PeptideIdentification>::iterator it_pepid = consensus_map[i_cm].getPeptideIdentifications().begin();
             it_pepid != consensus_map[i_cm].getPeptideIdentifications().end();
             ++it_pepid)
        {
          // are Protein- and PeptideIdentification from the same search engine run?
          if (it_pepid->getIdentifier() != protein_ident.getIdentifier())
            continue;

          std::vector<PeptideHit> peptide_hits_local;

          it_pepid->getReferencingHits(accession, peptide_hits_local);

          if (peptide_hits_local.empty())
            continue;

          if (sortByUnique_(peptide_hits_local, it_pepid->isHigherScoreBetter())) // we found a unique peptide
          {
            peptide_hits.push_back(peptide_hits_local[0]);
          }

        }

        // if several PeptideIdentifications (==Spectra) were assigned to current ConsensusElement
        // --> take the best (as above), e.g. in SILAC this could happen
        // TODO: better idea?
        if (!peptide_hits.empty())
        {
          if (sortByUnique_(peptide_hits, consensus_map[i_cm].getPeptideIdentifications()[0].isHigherScoreBetter())) //found a unique peptide for current ConsensusElement
          {
            consensus_to_peptide[i_cm] = peptide_hits[0];
#ifdef DEBUG_INFERENCE
            std::cout << "assign peptide " <<  peptide_hits[0].getSequence() << " to Protein " << accession << std::endl;
#endif
          }
        }

      }       // ! ConsensusMap loop

      // no peptides found that match current Protein
      if (consensus_to_peptide.empty())
        continue;

      // Use all matching ConsensusElements to derive a quantitation for current protein
      // build up ratios for every map vs reference
      double coverage = 0;
      Map<Size, std::vector<IntensityType> > ratios;

      // number of unique peptides pointing to current protein
      UInt coverage_count = (UInt)consensus_to_peptide.size();

      for (Map<size_t, PeptideHit>::iterator it_pephits = consensus_to_peptide.begin();
           it_pephits != consensus_to_peptide.end();
           ++it_pephits)
      {
        coverage += it_pephits->second.getSequence().size();
        const ConsensusFeature::HandleSetType & handles = consensus_map[it_pephits->first].getFeatures();
        //search if reference is present
        ConsensusFeature::HandleSetType::const_iterator it_ref = handles.end();
        for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin();
             it != handles.end();
             ++it)
        {
          if (it->getMapIndex() == reference_map)
          {
            it_ref = it;
            break;
          }
        }

        // did not find a reference
        // TODO assume intensity==0 instead??
        if (it_ref == handles.end())
          continue;

        for (ConsensusFeature::HandleSetType::const_iterator it = handles.begin();
             it != handles.end();
             ++it)
        {
          ratios[it->getMapIndex()].push_back(it->getIntensity() / it_ref->getIntensity());
        }

      }

      // sort ratios map-wise and take median
      for (ConsensusMap::FileDescriptions::const_iterator it_file = consensus_map.getFileDescriptions().begin();
           it_file != consensus_map.getFileDescriptions().end();
           ++it_file)
      {
        if (ratios.has(it_file->first))
        {
          //sort intensity ratios for map #it_file->first
          std::sort(ratios[it_file->first].begin(), ratios[it_file->first].end());
          //take median
          IntensityType protein_ratio = ratios[it_file->first][ratios[it_file->first].size() / 2];

          //TODO if ratios have high variance emit a warning!

          protein_ident.getHits()[i].setMetaValue(String("ratio_") + String(it_file->first), protein_ratio);
        }

      }       // ! map loop

      // % coverage of protein by peptides
      coverage /= DoubleReal(protein_ident.getHits()[i].getSequence().size()) / 100;

      protein_ident.getHits()[i].setMetaValue("coverage", coverage);
      protein_ident.getHits()[i].setMetaValue("hits", coverage_count);

    }     // ! Protein loop



    // protein_to_peptides now contains the Protein -> Peptides mapping
    // lets estimate the

  }
Exemplo n.º 18
0
  ExitCodes main_(int, const char **)
  {
    //load input features
    FeatureMap input;
    FeatureXMLFile().load(getStringOption_("in"), input);

    //load truth consensusXML
    ConsensusMap truth;
    ConsensusXMLFile().load(getStringOption_("truth"), truth);

    //parameters
    double mz_tol = getDoubleOption_("mz_tol");
    double rt_tol = getDoubleOption_("rt_tol");

    //seek manual feature in automatic feature map
    UInt matched_pairs = 0;
    UInt half_matched_pairs = 0;
    vector<double> t_ratio, i_ratio, rt_diffs, mz_diffs;
    for (Size t = 0; t < truth.size(); ++t)
    {
      if (truth[t].size() != 2)
      {
        cerr << "Error: consensus feature must contain exactly two elements!" << endl;
        continue;
      }
      vector<Feature> best_matches(2);
      vector<UInt> match_counts(2, 0);
      vector<Peak2D> elements(2);
      elements[0] = *(truth[t].getFeatures().begin());
      elements[1] = *(++(truth[t].getFeatures().begin()));
      double mz_tol_charged = mz_tol / truth[t].getCharge();
      for (Size e = 0; e < 2; ++e)
      {
        double best_score = 0.0;
        for (Size i = 0; i < input.size(); ++i)
        {
          const Feature & f_i = input[i];
          if (fabs(f_i.getRT() - elements[e].getRT()) < rt_tol
             && fabs(f_i.getMZ() - elements[e].getMZ()) < mz_tol_charged)
          {
            ++match_counts[e];
            double score = (1.0 - fabs(f_i.getMZ() - elements[e].getMZ()) / mz_tol_charged) * (1.0 - fabs(f_i.getRT() - elements[e].getRT()) / rt_tol);
            if (score > best_score)
            {
              best_score = score;
              best_matches[e] = f_i;
            }
          }
        }
      }

      //not matched
      if (match_counts[0] == 0 && match_counts[1] == 0)
      {
      }
      //half matched
      else if ((match_counts[0] > 0 && match_counts[1] == 0) || (match_counts[0] == 0 && match_counts[1] > 0))
      {
        ++half_matched_pairs;
      }
      //matched
      else
      {
        ++matched_pairs;
        double a_r = best_matches[0].getIntensity() / best_matches[1].getIntensity();
        t_ratio.push_back(a_r);
        double m_r = elements[0].getIntensity() / elements[1].getIntensity();
        i_ratio.push_back(m_r);
        rt_diffs.push_back(best_matches[1].getRT() - best_matches[0].getRT());
        mz_diffs.push_back((best_matches[1].getMZ() - best_matches[0].getMZ()) * truth[t].getCharge());
      }
    }

    cout << endl;
    cout << "pair detection statistics:" << endl;
    cout << "==========================" << endl;
    cout << "truth pairs: " << truth.size() << endl;
    cout << "input features: " << input.size() << endl;
    cout << endl;
    cout << "found: " << matched_pairs << " (" << String::number(100.0 * matched_pairs / truth.size(), 2) << "%)" << endl;
    cout << "half found : " << half_matched_pairs << " (" << String::number(100.0 * half_matched_pairs / truth.size(), 2) << "%)" << endl;
    cout << "not found : " << truth.size() - (matched_pairs + half_matched_pairs) << " (" << String::number(100.0 - 100.0 * (matched_pairs + half_matched_pairs) / truth.size(), 2) << "%)" << endl;
    cout << endl;
    cout << "relative pair ratios: " << fiveNumberQuotients(i_ratio, t_ratio, 3) << endl;
    cout << "pair distance RT : " << fiveNumbers(rt_diffs, 2) << endl;
    cout << "pair distance m/z: " << fiveNumbers(mz_diffs, 2) << endl;

    return EXECUTION_OK;
  }
Exemplo n.º 19
0
  TEST_EQUAL(feature_maps[0][2].getIntensity(), 200)
  TEST_EQUAL(feature_maps[0][2].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "CNHAAAAAAAAA")

  TEST_EQUAL(feature_maps[0][3].getIntensity(), 120)
  TEST_EQUAL(feature_maps[0][3].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "CNHAADDAAAAA")

  TEST_EQUAL(feature_maps[0][4].getIntensity(), 250)
  TEST_EQUAL(feature_maps[0][4].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "HHHHHHHHHHH")

  TEST_EQUAL(feature_maps[0][5].getIntensity(), 100)
  TEST_EQUAL(feature_maps[0][5].getPeptideIdentifications()[0].getHits()[0].getSequence().toString(), "LDCELR")

  // Test ConsensusMap association
  ConsensusMap cm = labeler.getConsensus();
  TEST_EQUAL(cm.size(), 1)
  ABORT_IF(cm.size() != 1)
  TEST_EQUAL(cm[0].getFeatures().size(),2)

  ConsensusFeature::HandleSetType::const_iterator fhIt = cm[0].getFeatures().begin();
  TEST_EQUAL(feature_maps[0][1].getUniqueId(), fhIt->getUniqueId())
  ++fhIt;
  TEST_EQUAL(feature_maps[0][0].getUniqueId(), fhIt->getUniqueId())

  // now test the incomplete variant
  createTestFeatureMapSimVector_(feature_maps);
  digestFeaturesMapSimVector_(feature_maps);

  O18Labeler incomplete_labeler;
  Param p;
  p.setValue("labeling_efficiency", 0.7);
Exemplo n.º 20
0
  void IDMapper::annotate(ConsensusMap & map, const std::vector<PeptideIdentification> & ids, const std::vector<ProteinIdentification> & protein_ids, bool measure_from_subelements)
  {
    // validate "RT" and "MZ" metavalues exist
    checkHits_(ids);

    //append protein identifications to Map
    map.getProteinIdentifications().insert(map.getProteinIdentifications().end(), protein_ids.begin(), protein_ids.end());

    //keep track of assigned/unassigned peptide identifications
    std::map<Size, Size> assigned;

    // store which peptides fit which feature (and avoid double entries)
    // consensusMap -> {peptide_index}
    std::vector<std::set<size_t> > mapping(map.size());

    DoubleList mz_values;
    DoubleReal rt_pep;
    IntList charges;

    //iterate over the peptide IDs
    for (Size i = 0; i < ids.size(); ++i)
    {
      if (ids[i].getHits().empty())
        continue;

      getIDDetails_(ids[i], rt_pep, mz_values, charges);

      //iterate over the features
      for (Size cm_index = 0; cm_index < map.size(); ++cm_index)
      {
        // if set to TRUE, we leave the i_mz-loop as we added the whole ID with all hits
        bool was_added = false;       // was current pep-m/z matched?!

        // iterate over m/z values of pepIds
        for (Size i_mz = 0; i_mz < mz_values.size(); ++i_mz)
        {
          DoubleReal mz_pep = mz_values[i_mz];

          // charge states to use for checking:
          IntList current_charges;
          if (!ignore_charge_)
          {
            // if "mz_ref." is "precursor", we have only one m/z value to check,
            // but still one charge state per peptide hit that could match:
            if (mz_values.size() == 1)
            {
              current_charges = charges;
            }
            else
            {
              current_charges.push_back(charges[i_mz]);
            }
            current_charges.push_back(0);             // "not specified" always matches
          }

          //check if we compare distance from centroid or subelements
          if (!measure_from_subelements)
          {
            if (isMatch_(rt_pep - map[cm_index].getRT(), mz_pep, map[cm_index].getMZ()) && (ignore_charge_ || ListUtils::contains(current_charges, map[cm_index].getCharge())))
            {
              was_added = true;
              map[cm_index].getPeptideIdentifications().push_back(ids[i]);
              ++assigned[i];
            }
          }
          else
          {
            for (ConsensusFeature::HandleSetType::const_iterator it_handle = map[cm_index].getFeatures().begin();
                 it_handle != map[cm_index].getFeatures().end();
                 ++it_handle)
            {
              if (isMatch_(rt_pep - it_handle->getRT(), mz_pep, it_handle->getMZ())  && (ignore_charge_ || ListUtils::contains(current_charges, it_handle->getCharge())))
              {
                was_added = true;
                if (mapping[cm_index].count(i) == 0)
                {
                  map[cm_index].getPeptideIdentifications().push_back(ids[i]);
                  ++assigned[i];
                  mapping[cm_index].insert(i);
                }
                break;                 // we added this peptide already.. no need to check other handles
              }
            }
            // continue to here
          }

          if (was_added)
            break;

        }         // m/z values to check

        // break to here

      }       // features
    }     // Identifications


    Size matches_none(0);
    Size matches_single(0);
    Size matches_multi(0);

    //append unassigned peptide identifications
    for (Size i = 0; i < ids.size(); ++i)
    {
      if (assigned[i] == 0)
      {
        map.getUnassignedPeptideIdentifications().push_back(ids[i]);
        ++matches_none;
      }
      else if (assigned[i] == 1)
      {
        ++matches_single;
      }
      else if (assigned[i] > 1)
      {
        ++matches_multi;
      }
    }

    //some statistics output
    LOG_INFO << "Unassigned peptides: " << matches_none << "\n"
             << "Peptides assigned to exactly one feature: "
             << matches_single << "\n"
             << "Peptides assigned to multiple features: "
             << matches_multi << std::endl;

  }
Exemplo n.º 21
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input file names
    String in = getStringOption_("in");
    bool write_mzML_index = getFlag_("write_mzML_index");

    //input file type
    FileHandler fh;
    FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type"));

    if (in_type == FileTypes::UNKNOWN)
    {
      in_type = fh.getType(in);
      writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2);
    }

    if (in_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine input file type!");
      return PARSE_ERROR;
    }


    //output file names and types
    String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));

    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }

    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    bool TIC_DTA2D = getFlag_("TIC_DTA2D");
    bool process_lowmemory = getFlag_("process_lowmemory");

    writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 1);

    String uid_postprocessing = getStringOption_("UID_postprocessing");
    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    typedef MSExperiment<Peak1D> MSExperimentType;
    MSExperimentType exp;

    typedef MSExperimentType::SpectrumType SpectrumType;

    typedef FeatureMap FeatureMapType;

    FeatureMapType fm;
    ConsensusMap cm;

    writeDebug_(String("Loading input file"), 1);

    if (in_type == FileTypes::CONSENSUSXML)
    {
      ConsensusXMLFile().load(in, cm);
      cm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You you will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting consensus features to peaks. You will lose information!");
        exp.set2DData(cm);
      }
    }
    else if (in_type == FileTypes::EDTA)
    {
      EDTAFile().load(in, cm);
      cm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You you will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting consensus features to peaks. You will lose information!");
        exp.set2DData(cm);
      }
    }
    else if (in_type == FileTypes::FEATUREXML ||
             in_type == FileTypes::TSV ||
             in_type == FileTypes::PEPLIST ||
             in_type == FileTypes::KROENIK)
    {
      fh.loadFeatures(in, fm, in_type);
      fm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting features to peaks. You will lose information! Mass traces are added, if present as 'num_of_masstraces' and 'masstrace_intensity_<X>' (X>=0) meta values.");
        exp.set2DData<true>(fm);
      }
    }
    else if (process_lowmemory)
    {
      // Special switch for the low memory options:
      // We can transform the complete experiment directly without first
      // loading the complete data into memory. PlainMSDataWritingConsumer will
      // write out mzML to disk as they are read from the input.
      if (in_type == FileTypes::MZML && out_type == FileTypes::MZML)
      {
        PlainMSDataWritingConsumer consumer(out);
        consumer.getOptions().setWriteIndex(write_mzML_index);
        consumer.addDataProcessing(getProcessingInfo_(DataProcessing::CONVERSION_MZML));
        MzMLFile mzmlfile; 
        mzmlfile.setLogType(log_type_);
        mzmlfile.transform(in, &consumer);
        return EXECUTION_OK;
      }
      else if (in_type == FileTypes::MZXML && out_type == FileTypes::MZML)
      {
        PlainMSDataWritingConsumer consumer(out);
        consumer.getOptions().setWriteIndex(write_mzML_index);
        consumer.addDataProcessing(getProcessingInfo_(DataProcessing::CONVERSION_MZML));
        MzXMLFile mzxmlfile; 
        mzxmlfile.setLogType(log_type_);
        mzxmlfile.transform(in, &consumer);
        return EXECUTION_OK;
      }
      else
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
          "Process_lowmemory option can only be used with mzML / mzXML input and mzML output data types.");
      }
    }
    else
    {
      fh.loadExperiment(in, exp, in_type, log_type_);
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    writeDebug_(String("Writing output file"), 1);

    if (out_type == FileTypes::MZML)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZML));
      MzMLFile f;
      f.setLogType(log_type_);
      f.getOptions().setWriteIndex(write_mzML_index);
      ChromatogramTools().convertSpectraToChromatograms(exp, true);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::MZDATA)
    {
      //annotate output with data processing info
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZDATA));
      MzDataFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::MZXML)
    {
      //annotate output with data processing info
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZXML));
      MzXMLFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::DTA2D)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 FORMAT_CONVERSION));
      DTA2DFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      if (TIC_DTA2D)
      {
        // store the total ion chromatogram (TIC)
        f.storeTIC(out, exp);
      }
      else
      {
        // store entire experiment
        f.store(out, exp);
      }


    }
    else if (out_type == FileTypes::MGF)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 FORMAT_CONVERSION));
      MascotGenericFile f;
      f.setLogType(log_type_);
      f.store(out, exp, getFlag_("MGF_compact"));
    }
    else if (out_type == FileTypes::FEATUREXML)
    {
      if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) ||
          (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK))
      {
        if (uid_postprocessing == "ensure")
        {
          fm.applyMemberFunction(&UniqueIdInterface::ensureUniqueId);
        } else if (uid_postprocessing == "reassign")
        {
          fm.applyMemberFunction(&UniqueIdInterface::setUniqueId);
        }
      }
      else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA)
      {
        MapConversion::convert(cm, true, fm);
      }
      else // not loaded as feature map or consensus map
      {
        // The feature specific information is only defaulted. Enough reasons to issue a warning!
        writeLog_("Warning: Converting peaks to features will lead to incomplete features!");
        fm.clear();
        fm.reserve(exp.getSize());
        typedef FeatureMapType::FeatureType FeatureType;
        FeatureType feature;
        feature.setQuality(0, 1); // override default
        feature.setQuality(1, 1); // override default
        feature.setOverallQuality(1); // override default
        for (MSExperimentType::ConstIterator spec_iter = exp.begin();
             spec_iter != exp.end();
             ++spec_iter
             )
        {
          feature.setRT(spec_iter->getRT());
          for (SpectrumType::ConstIterator peak1_iter = spec_iter->begin();
               peak1_iter != spec_iter->end();
               ++peak1_iter
               )
          {
            feature.setMZ(peak1_iter->getMZ());
            feature.setIntensity(peak1_iter->getIntensity());
            feature.setUniqueId();
            fm.push_back(feature);
          }
        }
        fm.updateRanges();
      }

      addDataProcessing_(fm, getProcessingInfo_(DataProcessing::
                                                FORMAT_CONVERSION));
      FeatureXMLFile().store(out, fm);
    }
    else if (out_type == FileTypes::CONSENSUSXML)
    {
      if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) ||
          (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK))
      {
        if (uid_postprocessing == "ensure")
        {
          fm.applyMemberFunction(&UniqueIdInterface::ensureUniqueId);
        } else if (uid_postprocessing == "reassign")
        {
          fm.applyMemberFunction(&UniqueIdInterface::setUniqueId);
        }
        MapConversion::convert(0, fm, cm);
      }
      // nothing to do for consensus input
      else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA)
      {
      }
      else // experimental data
      {
        MapConversion::convert(0, exp, cm, exp.size());
      }

      addDataProcessing_(cm, getProcessingInfo_(DataProcessing::
                                                FORMAT_CONVERSION));
      ConsensusXMLFile().store(out, cm);
    }
    else if (out_type == FileTypes::EDTA)
    {
      if (fm.size() > 0 && cm.size() > 0)
      {
        LOG_ERROR << "Internal error: cannot decide on container (Consensus or Feature)! This is a bug. Please report it!";
        return INTERNAL_ERROR;
      }
      if (fm.size() > 0) EDTAFile().store(out, fm);
      else if (cm.size() > 0) EDTAFile().store(out, cm);
    }
    else if (out_type == FileTypes::CSV)
    {
      // as ibspectra is currently the only csv/text based format we assume
      // that out_type == FileTypes::CSV means ibspectra, if more formats
      // are added we need a more intelligent strategy to decide which
      // conversion is requested

      // IBSpectra selected as output type
      if (in_type != FileTypes::CONSENSUSXML)
      {
        LOG_ERROR << "Incompatible input data: FileConverter can only convert consensusXML files to ibspectra format.";
        return INCOMPATIBLE_INPUT_DATA;
      }

      IBSpectraFile ibfile;
      ibfile.store(out, cm);
    }
    else
    {
      writeLog_("Unknown output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    return EXECUTION_OK;
  }