Exemplo n.º 1
0
  void InternalCalibration::calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map, std::vector<PeptideIdentification> & ref_ids, String trafo_file_name)
  {
    checkReferenceIds_(ref_ids);

    calibrated_feature_map = feature_map;
    // clear the ids
    for (Size f = 0; f < calibrated_feature_map.size(); ++f)
    {
      calibrated_feature_map[f].getPeptideIdentifications().clear();
    }

    // map the reference ids onto the features
    IDMapper mapper;
    Param param;
    param.setValue("rt_tolerance", (DoubleReal)param_.getValue("rt_tolerance"));
    param.setValue("mz_tolerance", param_.getValue("mz_tolerance"));
    param.setValue("mz_measure", param_.getValue("mz_tolerance_unit"));
    mapper.setParameters(param);
    std::vector<ProteinIdentification> vec;
    mapper.annotate(calibrated_feature_map, ref_ids, vec);

    // calibrate
    calibrateMapGlobally(calibrated_feature_map, calibrated_feature_map, trafo_file_name);

    // copy the old ids
    calibrated_feature_map.setUnassignedPeptideIdentifications(feature_map.getUnassignedPeptideIdentifications());
    for (Size f = 0; f < feature_map.size(); ++f)
    {
      calibrated_feature_map[f].getPeptideIdentifications().clear();
      if (!feature_map[f].getPeptideIdentifications().empty())
      {
        calibrated_feature_map[f].setPeptideIdentifications(feature_map[f].getPeptideIdentifications());
      }
    }
  }
Exemplo n.º 2
0
  void InternalCalibration::applyTransformation_(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map)
  {
    calibrated_feature_map = feature_map;
    for (Size f = 0; f < feature_map.size(); ++f)
    {
      DoubleReal mz = feature_map[f].getMZ();
      mz = trafo_.apply(mz);
      calibrated_feature_map[f].setMZ(mz);

      // apply transformation to convex hulls and subordinates
      for (Size s = 0; s < calibrated_feature_map[f].getSubordinates().size(); ++s)
      {
        // subordinates
        DoubleReal mz = calibrated_feature_map[f].getSubordinates()[s].getMZ();
        mz = trafo_.apply(mz);
        calibrated_feature_map[f].getSubordinates()[s].setMZ(mz);
      }
      for (Size s = 0; s < calibrated_feature_map[f].getConvexHulls().size(); ++s)
      {
        // convex hulls
        std::vector<DPosition<2> > point_vec = calibrated_feature_map[f].getConvexHulls()[s].getHullPoints();
        calibrated_feature_map[f].getConvexHulls()[s].clear();
        for (Size p = 0; p < point_vec.size(); ++p)
        {
          DoubleReal mz = point_vec[p][1];
          mz = trafo_.apply(mz);
          point_vec[p][1] = mz;
        }
        calibrated_feature_map[f].getConvexHulls()[s].setHullPoints(point_vec);
      }
    }
  }
Exemplo n.º 3
0
void DeepPyramid::processFeatureMap(int filterIdx, const FeatureMap &map, vector<BoundingBox> &detectedObjects) const {
    Size mapSize = map.size();
    Size filterSize = rootFilter[filterIdx]->getMapSize();
    cout << "size: "<<map.size()<<endl;
    for (int width = 0; width < mapSize.width-filterSize.width; width+=stride) {
        for (int height = 0; height < mapSize.height-filterSize.height; height+=stride) {
            FeatureMap extractedMap;
            map.extractFeatureMap(Rect(Point(width, height), filterSize), extractedMap);
            if (rootFilter[filterIdx]->predict(extractedMap) == OBJECT) {
                BoundingBox box;
                box.norm5Box = Rect(Point(width, height), filterSize);
                box.confidence = std::fabs(rootFilter[filterIdx]->predict(extractedMap, true));
                box.map = extractedMap;
                detectedObjects.push_back(box);
            }
        }
    }
}
 void applyFDRcutoff(FeatureMap & feature_map, double cutoff, String fdr_name)
 {
   FeatureMap out_feature_map = feature_map;
   out_feature_map.clear(false);
   for (Size i = 0; i < feature_map.size(); i++)
   {
     if ((double)feature_map[i].getMetaValue(fdr_name) < cutoff)
     {
       out_feature_map.push_back(feature_map[i]);
     }
   }
   feature_map = out_feature_map;
 }
Exemplo n.º 5
0
  void EDTAFile::store(const String& filename, const FeatureMap& map) const
  {
    TextFile tf;
    tf.addLine("RT\tm/z\tintensity\tcharge");

    for (Size i = 0; i < map.size(); ++i)
    {
      const Feature& f = map[i];
      tf.addLine(String(f.getRT()) + "\t" + f.getMZ() + "\t" + f.getIntensity() + "\t" + f.getCharge());
    }

    tf.store(filename);

  }
Exemplo n.º 6
0
 void InternalCalibration::checkReferenceIds_(const FeatureMap<> & feature_map)
 {
   Size num_ids = 0;
   for (Size f = 0; f < feature_map.size(); ++f)
   {
     if (!feature_map[f].getPeptideIdentifications().empty() && feature_map[f].getPeptideIdentifications()[0].getHits().size() > 1)
     {
       throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: Your feature map contains PeptideIdentifications with more than one hit, use the IDFilter to select only the best hits before you map the ids to the feature map.");
     }
     else if (!feature_map[f].getPeptideIdentifications().empty())
       ++num_ids;
   }
   if (num_ids < 2)
   {
     throw Exception::InvalidParameter(__FILE__, __LINE__, __PRETTY_FUNCTION__, "InternalCalibration: Your feature map contains less than two PeptideIdentifications, can't perform a linear regression on your data.");
   }
 }
  vector<double> sliceStatistics(const FeatureMap& map, Size begin, Size end) const
  {
    // If we are asked to produce stats for an empty set, return an empty vector.
    if (end <= begin || end > map.size())
    {
      return vector<double>(43);
    }

    Size size = end - begin;
    vector<double> intensities(size);
    vector<double> peak_widths(size);
    vector<double> mz(size);
    vector<double> overall_qualities(size);
    vector<double> mz_qualities(size);
    vector<double> rt_qualities(size);
    double tic = 0.0;

    for (Size i = begin; i < end; ++i)
    {
      intensities[i - begin]  = map[i].getIntensity();
      mz[i - begin]                       = map[i].getMZ();
      peak_widths[i - begin]  = map[i].getWidth();
      rt_qualities[i - begin] = map[i].getQuality(Feature::RT);
      mz_qualities[i - begin] = map[i].getQuality(Feature::MZ);
      overall_qualities[i - begin] = map[i].getOverallQuality();
      tic += map[i].getIntensity();
    }

    vector<double> results;
    SomeStatistics some_statistics;
    results.reserve(43); // 6 7-number stats + tic
    results.push_back(tic);
    results << some_statistics(intensities);
    results << some_statistics(mz);
    results << some_statistics(peak_widths);
    results << some_statistics(overall_qualities);
    results << some_statistics(rt_qualities);
    results << some_statistics(mz_qualities);

    return results;
  }
Exemplo n.º 8
0
 /// Counts the number of features with meta value @p name equal to @p value
 UInt count(const FeatureMap& map, const String& name, const String& value = "")
 {
   UInt count = 0;
   for (Size i = 0; i < map.size(); ++i)
   {
     if (map[i].metaValueExists(name))
     {
       if (value == "")
       {
         ++count;
       }
       else
       {
         if (map[i].getMetaValue(name).toString() == value)
         {
           ++count;
         }
       }
     }
   }
   return count;
 }
Exemplo n.º 9
0
  void InternalCalibration::calibrateMapGlobally(const FeatureMap<> & feature_map, FeatureMap<> & calibrated_feature_map,
                                                 String trafo_file_name)
  {
    // check if the ids
    checkReferenceIds_(feature_map);
    // first collect theoretical and observed m/z values
    std::vector<DoubleReal> observed_masses;
    std::vector<DoubleReal> theoretical_masses;
    for (Size f = 0; f < feature_map.size(); ++f)
    {
      // if more than one peptide id exists for this feature we can't use it as reference
      if (feature_map[f].getPeptideIdentifications().size() > 1)
        continue;
      if (!feature_map[f].getPeptideIdentifications().empty())
      {
        Int charge = feature_map[f].getPeptideIdentifications()[0].getHits()[0].getCharge();
        DoubleReal theo_mass = feature_map[f].getPeptideIdentifications()[0].getHits()[0].getSequence().getMonoWeight(Residue::Full, charge) / (DoubleReal)charge;
        theoretical_masses.push_back(theo_mass);
        observed_masses.push_back(feature_map[f].getMZ());
#ifdef DEBUG_CALIBRATION
        std::cout << feature_map[f].getRT() << " " << feature_map[f].getMZ() << " " << theo_mass << std::endl;
        std::cout << feature_map[f].getPeptideIdentifications()[0].getHits().size() << std::endl;
        std::cout << feature_map[f].getPeptideIdentifications()[0].getHits()[0].getSequence() << std::endl;
        std::cout << feature_map[f].getPeptideIdentifications()[0].getHits()[0].getCharge() << std::endl;
#endif
      }
    }
    // then make the linear regression
    makeLinearRegression_(observed_masses, theoretical_masses);
    // apply transformation
    applyTransformation_(feature_map, calibrated_feature_map);
    if (trafo_file_name != "")
    {
      TransformationXMLFile().store(trafo_file_name, trafo_);
    }
  }
	FeatureMap<> output;
	
	//parameters
	Param param;
  ParamXMLFile paramFile;
	paramFile.load(OPENMS_GET_TEST_DATA_PATH("FeatureFinderAlgorithmPicked.ini"), param);
	param = param.copy("FeatureFinder:1:algorithm:",true);
	//Dummy featurefinder
	FeatureFinder ff;
	
	FFPP ffpp;
	ffpp.setParameters(param);
	ffpp.setData(input, output, ff);
	ffpp.run();
	
	TEST_EQUAL(output.size(),8);
	
	TOLERANCE_ABSOLUTE(0.001);
	TEST_REAL_SIMILAR(output[0].getOverallQuality(),0.8819);
	TEST_REAL_SIMILAR(output[1].getOverallQuality(),0.8673);
	TEST_REAL_SIMILAR(output[2].getOverallQuality(),0.9079);
	TEST_REAL_SIMILAR(output[3].getOverallQuality(),0.9271);
	TEST_REAL_SIMILAR(output[4].getOverallQuality(),0.9401);
	TEST_REAL_SIMILAR(output[5].getOverallQuality(),0.9094);
	TEST_REAL_SIMILAR(output[6].getOverallQuality(),0.9403);
	TEST_REAL_SIMILAR(output[7].getOverallQuality(),0.9243);
	
	TOLERANCE_ABSOLUTE(20.0);
	TEST_REAL_SIMILAR(output[0].getIntensity(),51260.0);
	TEST_REAL_SIMILAR(output[1].getIntensity(),44667.3);
	TEST_REAL_SIMILAR(output[2].getIntensity(),34613.3);
Exemplo n.º 11
0
  void FeatureFinder::run(const String& algorithm_name, PeakMap& input_map, FeatureMap& features, const Param& param, const FeatureMap& seeds)
  {
    // Nothing to do if there is no data
    if ((algorithm_name != "mrm" && input_map.empty()) || (algorithm_name == "mrm" && input_map.getChromatograms().empty()))
    {
      features.clear(true);
      return;
    }

    // check input
    {
      // We need updated ranges => check number of peaks
      if (algorithm_name != "mrm" && input_map.getSize() == 0)
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder needs updated ranges on input map. Aborting.");
      }

      // We need MS1 data only => check levels
      if (algorithm_name != "mrm" && (input_map.getMSLevels().size() != 1 || input_map.getMSLevels()[0] != 1))
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data. Aborting.");
      }

      //Check if the peaks are sorted according to m/z
      if (!input_map.isSorted(true))
      {
        LOG_WARN << "Input map is not sorted by RT and m/z! This is done now, before applying the algorithm!" << std::endl;
        input_map.sortSpectra(true);
        input_map.sortChromatograms(true);
      }
      for (Size s = 0; s < input_map.size(); ++s)
      {
        if (input_map[s].empty())
          continue;
        if (input_map[s][0].getMZ() < 0)
        {
          throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on spectra that contain peaks with positive m/z values. Filter the data accordingly beforehand! Aborting.");
        }
      }
    }

    // initialize
    if (algorithm_name != "mrm" && algorithm_name != "centroided")
    {
      // Resize peak flag vector
      flags_.resize(input_map.size());
      for (Size i = 0; i < input_map.size(); ++i)
      {
        flags_[i].assign(input_map[i].size(), UNUSED);
      }
    }

    // do the work
    if (algorithm_name != "none")
    {
      FeatureFinderAlgorithm* algorithm = Factory<FeatureFinderAlgorithm>::create(algorithm_name);
      algorithm->setParameters(param);
      algorithm->setData(input_map, features, *this);
      algorithm->setSeeds(seeds);
      algorithm->run();
      delete(algorithm);
    }

    if (algorithm_name != "mrm") // mrm  works on chromatograms; the next section is only for conventional data
    {
      //report RT apex spectrum index and native ID for each feature
      for (Size i = 0; i < features.size(); ++i)
      {
        //index
        Size spectrum_index = input_map.RTBegin(features[i].getRT()) - input_map.begin();
        features[i].setMetaValue("spectrum_index", spectrum_index);
        //native id
        if (spectrum_index < input_map.size())
        {
          String native_id = input_map[spectrum_index].getNativeID();
          features[i].setMetaValue("spectrum_native_id", native_id);
        }
        else
        {
          /// @todo that happens sometimes using IsotopeWaveletFeatureFinder (Rene, Marc, Andreas, Clemens)
          std::cerr << "FeatureFinderAlgorithm_impl, line=" << __LINE__ << "; FixMe this cannot be, but happens" << std::endl;
        }
      }
    }
  }
std::vector<PeptideIdentification> pep_ids;
String document_id;
IdXMLFile file;
file.load(OPENMS_GET_TEST_DATA_PATH("PrecursorIonSelection_ids.idXML"),prot_ids,pep_ids, document_id);

FeatureMap features,next_features;
FeatureXMLFile f_file;
f_file.load(OPENMS_GET_TEST_DATA_PATH("PrecursorIonSelection_features.featureXML"),features);
START_SECTION(void sortByTotalScore(FeatureMap& features))
  ptr->sortByTotalScore(features);
  TEST_REAL_SIMILAR((double)features[0].getMetaValue("msms_score"),49485.75)
END_SECTION

START_SECTION(void getNextPrecursors(FeatureMap& features,FeatureMap& next_features,UInt number))
  ptr->getNextPrecursors(features,next_features,2);
  TEST_EQUAL(next_features.size(),2)
  TEST_REAL_SIMILAR((double)next_features[0].getMetaValue("msms_score"),49485.75)
  TEST_REAL_SIMILAR((double)next_features[1].getMetaValue("msms_score"),47365)
END_SECTION

PrecursorIonSelectionPreprocessing preprocessing;
Param param;
param.setValue("precursor_mass_tolerance",0.05);
param.setValue("precursor_mass_tolerance_unit","Da");
param.setValue("missed_cleavages",1);
param.setValue("preprocessed_db_path",OPENMS_GET_TEST_DATA_PATH(""));
preprocessing.setParameters(param);
preprocessing.dbPreprocessing(OPENMS_GET_TEST_DATA_PATH("PrecursorIonSelection_db.fasta"),false);
Param param2;
param2.setValue("Preprocessing:precursor_mass_tolerance",0.05);
param2.setValue("Preprocessing:precursor_mass_tolerance_unit","Da");
  void processInput(const char * filename, FeatureMap & feature_map)
  {
    FeatureMap out_feature_map = feature_map;
    std::map<String, int> added_already;
    out_feature_map.clear(false);

    std::map<String, Feature*> feature_map_ref;
    //for (FeatureMap::iterator feature = feature_map.begin(); feature != feature_map.end(); feature++)
    for (Size i = 0; i < feature_map.size(); i++)
    {
      feature_map_ref[feature_map[i].getUniqueId()] = &feature_map[i];
    }

    std::ifstream data(filename);
    std::string   line;

    // Read header
    std::getline(data, line);
    // std::map<int, String> header_dict; // not used
    std::map<String, int> header_dict_inv;
    {
      std::stringstream          lineStream(line);
      std::string                cell;
      int cnt = 0;
      while (std::getline(lineStream,cell,'\t'))
      {
        //header_dict[cnt] = cell;
        header_dict_inv[cell] = cnt;
        cnt++;
      }
    }

    if (header_dict_inv.find("id") == header_dict_inv.end() || 
        header_dict_inv.find("m_score") == header_dict_inv.end() || 
        header_dict_inv.find("d_score") == header_dict_inv.end() )
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: The tsv file is expected to have at least the following headers: id, m_score, d_score. " );
    }

    // Read file
    std::vector<std::string> current_row;
    std::string                cell;
    int line_nr = 0;
    double m_score, d_score;
    while (std::getline(data, line))
    {
      line_nr++;
      current_row.clear();
      std::stringstream  lineStream(line);
      while (std::getline(lineStream,cell,'\t'))
      {
        current_row.push_back(cell);
      }

      String id = current_row[header_dict_inv["id"]];
      id = id.substitute("f_", ""); 
      try
      {
        m_score = ((String)current_row[header_dict_inv["m_score"]]).toDouble();
      }
      catch (char* /*str*/)
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Could not convert String" + ((String)current_row[header_dict_inv["m_score"]]) + " on line " + String(line_nr));
      }
      try
      {
        d_score = ((String)current_row[header_dict_inv["d_score"]]).toDouble();
      }
      catch (char* /*str*/)
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Could not convert String" + ((String)current_row[header_dict_inv["d_score"]]) + " on line " + String(line_nr));
      }

      if (feature_map_ref.find(id) != feature_map_ref.end() )
      {
        Feature* feature = feature_map_ref.find(id)->second;
        feature->setMetaValue("m_score", m_score);
        feature->setMetaValue("d_score", d_score);
        // we are not allowed to have duplicate unique ids
        if (added_already.find(id) != added_already.end())
        {
          throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Error: Duplicate id found in CSV file: " + id );
        }
        out_feature_map.push_back(*feature);
      }
    }
    feature_map = out_feature_map;
  }
Exemplo n.º 14
0
FileHandler a;
TEST_EQUAL(a.getOptions().hasMSLevels(), false)
END_SECTION

START_SECTION((PeakFileOptions & getOptions()))
FileHandler a;
a.getOptions().addMSLevel(1);
TEST_EQUAL(a.getOptions().hasMSLevels(), true);
END_SECTION

START_SECTION((template <class FeatureType> bool loadFeatures(const String &filename, FeatureMap<FeatureType>&map, FileTypes::Type force_type = FileTypes::UNKNOWN)))
FileHandler tmp;
FeatureMap map;
TEST_EQUAL(tmp.loadFeatures("test.bla", map), false)
TEST_EQUAL(tmp.loadFeatures(OPENMS_GET_TEST_DATA_PATH("FeatureXMLFile_2_options.featureXML"), map), true)
TEST_EQUAL(map.size(), 7);
TEST_EQUAL(tmp.loadFeatures(OPENMS_GET_TEST_DATA_PATH("FeatureXMLFile_2_options.featureXML"), map), true)
TEST_EQUAL(map.size(), 7);
END_SECTION

START_SECTION((template <class PeakType> void storeExperiment(const String &filename, const MSExperiment<PeakType>&exp, ProgressLogger::LogType log = ProgressLogger::NONE)))
FileHandler fh;
PeakMap exp;
fh.loadExperiment(OPENMS_GET_TEST_DATA_PATH("MzMLFile_1.mzML"), exp);

//test mzML
String filename;
NEW_TMP_FILE(filename)
fh.storeExperiment(filename, exp);
TEST_EQUAL(fh.getTypeByContent(filename), FileTypes::MZML)
Exemplo n.º 15
0
  ExitCodes outputTo(ostream& os)
  {
    //-------------------------------------------------------------
    // Parameter handling
    //-------------------------------------------------------------

    // File names
    String in = getStringOption_("in");

    // File type
    FileHandler fh;
    FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type"));

    if (in_type == FileTypes::UNKNOWN)
    {
      in_type = fh.getType(in);
      writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2);
    }

    if (in_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine input file type!");
      return PARSE_ERROR;
    }

    MSExperiment<Peak1D> exp;
    FeatureMap feat;
    ConsensusMap cons;

    if (in_type == FileTypes::FEATUREXML) //features
    {
      FeatureXMLFile().load(in, feat);
      feat.updateRanges();
    }
    else if (in_type == FileTypes::CONSENSUSXML)     //consensus features
    {
      ConsensusXMLFile().load(in, cons);
      cons.updateRanges();
    }

    //-------------------------------------------------------------
    // meta information
    //-------------------------------------------------------------
    if (getFlag_("m"))
    {
      os << endl
         << "-- General information --" << endl
         << endl
         << "file name: " << in << endl
         << "file type: " <<  FileTypes::typeToName(in_type) << endl;

      //basic info
      os << endl
         << "-- Meta information --" << endl
         << endl;

      if (in_type == FileTypes::FEATUREXML) //features
      {
        os << "Document id       : " << feat.getIdentifier() << endl << endl;
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        os << "Document id       : " << cons.getIdentifier() << endl << endl;
      }
    }

    //-------------------------------------------------------------
    // data processing
    //-------------------------------------------------------------
    if (getFlag_("p"))
    {
      //basic info
      os << endl
         << "-- Data processing information --" << endl
         << endl;

      //get data processing info
      vector<DataProcessing> dp;
      if (in_type == FileTypes::FEATUREXML) //features
      {
        dp = feat.getDataProcessing();
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        dp = cons.getDataProcessing();
      }
      int i = 0;
      for (vector<DataProcessing>::iterator it = dp.begin(); it != dp.end(); ++it)
      {
        os << "Data processing " << i << endl;
        os << "\tcompletion_time:   " << (*it).getCompletionTime().getDate() << 'T' << (*it).getCompletionTime().getTime() << endl;
        os << "\tsoftware name:     " << (*it).getSoftware().getName() << " version " << (*it).getSoftware().getVersion() << endl;
        for (set<DataProcessing::ProcessingAction>::const_iterator paIt = (*it).getProcessingActions().begin(); paIt != (*it).getProcessingActions().end(); ++paIt)
        {
          os << "\t\tprocessing action: " << DataProcessing::NamesOfProcessingAction[*paIt] << endl;
        }
      }
      ++i;
    }

    //-------------------------------------------------------------
    // statistics
    //-------------------------------------------------------------
    if (getFlag_("s"))
    {
      //-------------------------------------------------------------
      // Content statistics
      //-------------------------------------------------------------
      Map<String, int> meta_names;
      if (in_type == FileTypes::FEATUREXML) //features
      {
        os << "Number of features: " << feat.size() << endl
           << endl
           << "Ranges:" << endl
           << "  retention time:  " << String::number(feat.getMin()[Peak2D::RT], 2) << " : " << String::number(feat.getMax()[Peak2D::RT], 2) << endl
           << "  mass-to-charge:  " << String::number(feat.getMin()[Peak2D::MZ], 2) << " : " << String::number(feat.getMax()[Peak2D::MZ], 2) << endl
           << "  intensity:       " << String::number(feat.getMinInt(), 2) << " : " << String::number(feat.getMaxInt(), 2) << endl
           << endl;

        // Charge distribution
        Map<UInt, UInt> charges;
        for (Size i = 0; i < feat.size(); ++i)
        {
          charges[feat[i].getCharge()]++;
        }

        os << "Charge distribution" << endl;
        for (Map<UInt, UInt>::const_iterator it = charges.begin();
             it != charges.end(); ++it)
        {
          os << "charge " << it->first << ": " << it->second << endl;
        }
      }
      else if (in_type == FileTypes::CONSENSUSXML)       //consensus features
      {
        map<Size, UInt> num_consfeat_of_size;
        for (ConsensusMap::const_iterator cmit = cons.begin();
             cmit != cons.end(); ++cmit)
        {
          ++num_consfeat_of_size[cmit->size()];
        }

        os << endl << "Number of consensus features:" << endl;
        for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i)
        {
          os << "  of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl;
        }
        os << "  total:      " << setw(6) << cons.size() << endl << endl;

        os << "Ranges:" << endl
           << "  retention time:  " << String::number(cons.getMin()[Peak2D::RT], 2) << " : " << String::number(cons.getMax()[Peak2D::RT], 2) << endl
           << "  mass-to-charge:  " << String::number(cons.getMin()[Peak2D::MZ], 2) << " : " << String::number(cons.getMax()[Peak2D::MZ], 2) << endl
           << "  intensity:       " << String::number(cons.getMinInt(), 2) << " : " << String::number(cons.getMaxInt(), 2) << endl;

        // file descriptions
        const ConsensusMap::FileDescriptions& descs = cons.getFileDescriptions();
        if (!descs.empty())
        {
          os << endl <<
          "File descriptions:" << endl;
          for (ConsensusMap::FileDescriptions::const_iterator it = descs.begin(); it != descs.end(); ++it)
          {
            os << " - " << it->second.filename << endl
               << "   identifier: " << it->first << endl
               << "   label     : " << it->second.label << endl
               << "   size      : " << it->second.size << endl;
          }
        }
      }

      os << endl
         << "-- Summary Statistics --" << endl
         << endl;

    }

    if (in_type == FileTypes::FEATUREXML) //features
    {
      feat.sortByRT();

      vector<double> slice_stats;
      Size n = getIntOption_("n");

      Size begin = 0;
      Size end = 0;
      os << "#slice\tRT_begin\tRT_end\tnumber_of_features\ttic\t"
         << "int_mean\tint_stddev\tint_min\tint_max\tint_median\tint_lowerq\tint_upperq\t"
         << "mz_mean\tmz_stddev\tmz_min\tmz_max\tmz_median\tmz_lowerq\tmz_upperq\t"
         << "width_mean\twidth_stddev\twidth_min\twidth_max\twidth_median\twidth_lowerq\twidth_upperq\t"
         << "qual_mean\tqual_stddev\tqual_min\tqual_max\tqual_median\tqual_lowerq\tqual_upperq\t"
         << "rt_qual_mean\trt_qual_stddev\trt_qual_min\trt_qual_max\trt_qual_median\trt_qual_lowerq\trt_qual_upperq\t"
         << "mz_qual_mean\tmz_qual_stddev\tmz_qual_min\tmz_qual_max\tmz_qual_median\tmz_qual_lowerq\tmz_qual_upperq"
         << endl;

      double rt_begin = 0.0;
      for (Size slice = 0; slice < n; ++slice)
      {
        // Determine slice boundaries.
        double rt_end = feat.back().getRT() / (double)n * (slice + 1);
        for (end = begin; end < feat.size() && feat[end].getRT() < rt_end; ++end) {}

        // Compute statistics on all features in this slice.
        slice_stats = sliceStatistics(feat, begin, end);

        // Write the beginning and end of the slices to the output as well as the slice index.
        os << slice << "\t" << rt_begin << "\t" << rt_end << "\t" << end - begin << "\t";

        // Write the statistics as a line of an csv file
        copy(slice_stats.begin(), slice_stats.end(), ostream_iterator<double>(os, "\t"));
        os << endl;

        begin = end;
        rt_begin = rt_end;
      }
    }
    else if (in_type == FileTypes::CONSENSUSXML)     //consensus features
    {
      Size size = cons.size();

      vector<double> intensities;
      intensities.reserve(size);
      vector<double> qualities(size);
      qualities.reserve(size);
      vector<double> widths(size);
      widths.reserve(size);

      vector<double> rt_delta_by_elems;
      vector<double> rt_aad_by_elems;
      vector<double> rt_aad_by_cfs;
      rt_aad_by_cfs.reserve(size);

      vector<double> mz_delta_by_elems;
      vector<double> mz_aad_by_elems;
      vector<double> mz_aad_by_cfs;
      mz_aad_by_cfs.reserve(size);

      vector<double> it_delta_by_elems;
      vector<double> it_aad_by_elems;
      vector<double> it_aad_by_cfs;
      it_aad_by_cfs.reserve(size);

      for (ConsensusMap::const_iterator cm_iter = cons.begin();
           cm_iter != cons.end(); ++cm_iter)
      {
        double rt_aad = 0;
        double mz_aad = 0;
        double it_aad = 0;
        intensities.push_back(cm_iter->getIntensity());
        qualities.push_back(cm_iter->getQuality());
        widths.push_back(cm_iter->getWidth());
        for (ConsensusFeature::HandleSetType::const_iterator hs_iter = cm_iter->begin();
             hs_iter != cm_iter->end(); ++hs_iter)
        {
          double rt_diff = hs_iter->getRT() - cm_iter->getRT();
          rt_delta_by_elems.push_back(rt_diff);
          if (rt_diff < 0)
          {
            rt_diff = -rt_diff;
          }
          rt_aad_by_elems.push_back(rt_diff);
          rt_aad += rt_diff;
          double mz_diff = hs_iter->getMZ() - cm_iter->getMZ();
          mz_delta_by_elems.push_back(mz_diff);
          if (mz_diff < 0)
          {
            mz_diff = -mz_diff;
          }
          mz_aad_by_elems.push_back(mz_diff);
          mz_aad += mz_diff;
          double it_ratio = hs_iter->getIntensity() / (cm_iter->getIntensity() ? cm_iter->getIntensity() : 1.);
          it_delta_by_elems.push_back(it_ratio);
          if (it_ratio < 1.)
          {
            it_ratio = 1. / it_ratio;
          }
          it_aad_by_elems.push_back(it_ratio);
          it_aad += it_ratio;
        }
        if (!cm_iter->empty())
        {
          rt_aad /= cm_iter->size();
          mz_aad /= cm_iter->size();
          it_aad /= cm_iter->size();
        } // otherwise rt_aad etc. are 0 anyway
        rt_aad_by_cfs.push_back(rt_aad);
        mz_aad_by_cfs.push_back(mz_aad);
        it_aad_by_cfs.push_back(it_aad);
      }

      OpenMS::SomeStatistics some_statistics;

      os.precision(writtenDigits(ConsensusFeature::IntensityType()));
      os << "Intensities of consensus features:" << endl << some_statistics(intensities) << endl;

      os.precision(writtenDigits(ConsensusFeature::QualityType()));
      os << "Qualities of consensus features:" << endl << some_statistics(qualities) << endl;

      os.precision(writtenDigits(ConsensusFeature::CoordinateType()));
      os << "Retention time differences ( element-center, weight 1 per element):" << endl << some_statistics(rt_delta_by_elems) << endl;
      os << "Absolute retention time differences ( |element-center|, weight 1 per element):" << endl << some_statistics(rt_aad_by_elems) << endl;
      os << "Average absolute differences of retention time within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(rt_aad_by_cfs) << endl;

      os.precision(writtenDigits(ConsensusFeature::CoordinateType()));
      os << "Mass-to-charge differences ( element-center, weight 1 per element):" << endl << some_statistics(mz_delta_by_elems) << endl;
      os << "Absolute differences of mass-to-charge ( |element-center|, weight 1 per element):" << endl << some_statistics(mz_aad_by_elems) << endl;
      os << "Average absolute differences of mass-to-charge within consensus features ( |element-center|, weight 1 per consensus features):" << endl << some_statistics(mz_aad_by_cfs) << endl;

      os.precision(writtenDigits(ConsensusFeature::IntensityType()));
      os << "Intensity ratios ( element/center, weight 1 per element):" << endl << some_statistics(it_delta_by_elems) << endl;
      os << "Relative intensity error ( max{(element/center),(center/element)}, weight 1 per element):" << endl << some_statistics(it_aad_by_elems) << endl;
      os << "Average relative intensity error within consensus features ( max{(element/center),(center/element)}, weight 1 per consensus features):" << endl << some_statistics(it_aad_by_cfs) << endl;
    }

    return EXECUTION_OK;
  }
    // Wrong assignment of the mono-isotopic mass for precursors are assumed:
    // - if precursor_mz matches the mz of a non-monoisotopic feature mass trace
    // - and in the case that believe_charge is true: if feature_charge matches the precursor_charge
    // In the case of wrong mono-isotopic assignment several options for correction are available:
    // keep_original will create a copy of the precursor and tandem spectrum for the new mono-isotopic mass trace and retain the original one
    // all_matching_features does this not for only the closest feature but all features in a question
    set<Size> correctToNearestFeature(const FeatureMap& features, PeakMap & exp, double rt_tolerance_s = 0.0, double mz_tolerance = 0.0, bool ppm = true, bool believe_charge = false, bool keep_original = false, bool all_matching_features = false, int max_trace = 2)
    {
      set<Size> corrected_precursors;
      // for each precursor/MS2 find all features that are in the given tolerance window (bounding box + rt tolerances)
      // if believe_charge is set, only add features that match the precursor charge
      map<Size, set<Size> > scan_idx_to_feature_idx;

      for (Size scan = 0; scan != exp.size(); ++scan)
      {
        // skip non-tandem mass spectra
        if (exp[scan].getMSLevel() != 2 || exp[scan].getPrecursors().empty()) continue;

        // extract precusor / MS2 information
        const double pc_mz = exp[scan].getPrecursors()[0].getMZ();
        const double rt = exp[scan].getRT();
        const int pc_charge = exp[scan].getPrecursors()[0].getCharge();

        for (Size f = 0; f != features.size(); ++f)
        {
          // feature  is incompatible if believe_charge is set and charges don't match
          if (believe_charge && features[f].getCharge() != pc_charge) continue;

          // check if precursor/MS2 position overlap with feature
          if (overlaps_(features[f], rt, pc_mz, rt_tolerance_s))
          {
            scan_idx_to_feature_idx[scan].insert(f);
          }
        }
      }

      // filter sets to retain compatible features:
      // if precursor_mz = feature_mz + n * feature_charge (+/- mz_tolerance) a feature is compatible, others are removed from the set
      for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
      {
        const Size scan = it->first;
        const double pc_mz = exp[scan].getPrecursors()[0].getMZ();
        const double mz_tolerance_da = ppm ? pc_mz * mz_tolerance * 1e-6  : mz_tolerance;

        // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
        for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); )
        {
          if (!compatible_(features[*sit], pc_mz, mz_tolerance_da, max_trace))
          {
            it->second.erase(sit++);
          }
          else
          {
            ++sit;
          }
        }
      }

      // remove entries with no compatible features (empty sets).
      // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
      for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); )
      {
        if (it->second.empty())
        {
          scan_idx_to_feature_idx.erase(it++);
        }
        else
        {
          ++it;
        }
      }

      if (debug_level_ > 0)
      {
        LOG_INFO << "Number of precursors with compatible features: " << scan_idx_to_feature_idx.size() << endl;
      }

      if (!all_matching_features)
      {
        // keep only nearest features in set
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          const double pc_rt = exp[scan].getRT();

          double min_distance = 1e16;
          set<Size>::iterator best_feature = it->second.begin();

          // determine nearest/best feature
          for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ++sit)
          {
            const double current_distance = fabs(pc_rt - features[*sit].getRT());
            if (current_distance < min_distance)
            {
              min_distance = current_distance;
              best_feature = sit;
            }
          }

          // delete all except the nearest/best feature
          // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement
          for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); )
          {
            if (sit != best_feature)
            {
              it->second.erase(sit++);
            }
            else
            {
              ++sit;
            }
          }
        }
      }

      // depending on all_matching_features option, only the nearest or all features are contained in the sets
      // depending on options: move/copy corrected precursor and tandem spectrum
      if (keep_original)
      {
        // duplicate spectra for each feature in set and adapt precursor_mz and precursor_charge to feature_mz and feature_charge
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          MSSpectrum<> spectrum = exp[scan];
          corrected_precursors.insert(scan);
          for (set<Size>::iterator f_it = it->second.begin(); f_it != it->second.end(); ++f_it)
          {
            spectrum.getPrecursors()[0].setMZ(features[*f_it].getMZ());
            spectrum.getPrecursors()[0].setCharge(features[*f_it].getCharge());
            exp.addSpectrum(spectrum);
          }
        }
      }
      else
      {
        // set precursor_mz and _charge to the feature_mz and _charge
        for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it)
        {
          const Size scan = it->first;
          exp[scan].getPrecursors()[0].setMZ(features[*it->second.begin()].getMZ());
          exp[scan].getPrecursors()[0].setCharge(features[*it->second.begin()].getCharge());
          corrected_precursors.insert(scan);
        }
      }
      return corrected_precursors;
    }
Exemplo n.º 17
0
  void AbsoluteQuantitation::quantifyComponents(FeatureMap& unknowns)
  {
    //Potential Optimizations: create a map for each unknown FeatureMap
    // to reduce multiple loops

    // initialize all other variables
    Feature empty_feature;
    size_t IS_component_it, IS_component_group_it;

    // // iterate through the unknowns
    // for (size_t i = 0; i < unknowns.size(); i++)
    // {

    // iterate through each component_group/feature
    for (size_t feature_it = 0; feature_it < unknowns.size(); ++feature_it)
    {
      String component_group_name = (String)unknowns[feature_it].getMetaValue("PeptideRef");
      Feature unknowns_quant_feature;

      // iterate through each component/sub-feature
      for (size_t sub_it = 0; sub_it < unknowns[feature_it].getSubordinates().size(); ++sub_it)
      {
        String component_name = (String)unknowns[feature_it].getSubordinates()[sub_it].getMetaValue("native_id");

        // apply the calibration curve to components that are in the quant_method
        if (quant_methods_.count(component_name)>0)
        {
          double calculated_concentration = 0.0;
          std::map<String,AbsoluteQuantitationMethod>::iterator quant_methods_it = quant_methods_.find(component_name);
          String quant_component_name = quant_methods_it->second.getComponentName();
          String quant_IS_component_name = quant_methods_it->second.getISName();
          String quant_feature_name = quant_methods_it->second.getFeatureName();
          if (quant_IS_component_name != "")
          {
            // look up the internal standard for the component
            bool IS_found = false;
            // Optimization: 90% of the IS will be in the same component_group/feature
            for (size_t is_sub_it = 0; is_sub_it < unknowns[feature_it].getSubordinates().size(); ++is_sub_it)
            {
              String IS_component_name = (String)unknowns[feature_it].getSubordinates()[is_sub_it].getMetaValue("native_id");
              if (quant_IS_component_name == IS_component_name)
              {
                IS_found = true;
                IS_component_group_it = feature_it;
                IS_component_it = is_sub_it;
                break;
              }
            }
            if (!IS_found)
            {// expand IS search to all components
              // iterate through each component_group/feature
              for (size_t is_feature_it = 0; is_feature_it < unknowns.size(); ++is_feature_it)
              {
                //iterate through each component/sub-feature
                for (size_t is_sub_it = 0; is_sub_it < unknowns[is_feature_it].getSubordinates().size(); ++is_sub_it)
                {
                  String IS_component_name = (String)unknowns[is_feature_it].getSubordinates()[is_sub_it].getMetaValue("native_id");
                  if (quant_IS_component_name == IS_component_name)
                  {
                    IS_found = true;
                    IS_component_group_it = is_feature_it;
                    IS_component_it = is_sub_it;
                    break;
                  }
                }
                if (IS_found)
                {
                  break;
                }
              }
            }
            if (IS_found)
            {
              String transformation_model = quant_methods_it->second.getTransformationModel();
              Param transformation_model_params = quant_methods_it->second.getTransformationModelParams();
              calculated_concentration = applyCalibration(
                unknowns[feature_it].getSubordinates()[sub_it],
                unknowns[IS_component_group_it].getSubordinates()[IS_component_it],
                quant_feature_name,transformation_model,transformation_model_params);
            }
            else
            {
              LOG_INFO << "Component " << component_name << " IS " << quant_IS_component_name << " was not found.";
              LOG_INFO << "No concentration will be calculated.";
            }
          }
          else
          {
            String transformation_model = quant_methods_it->second.getTransformationModel();
            Param transformation_model_params = quant_methods_it->second.getTransformationModelParams();
            calculated_concentration = applyCalibration(
              unknowns[feature_it].getSubordinates()[sub_it],
              empty_feature,
              quant_feature_name,transformation_model,transformation_model_params);
          }

          // add new metadata (calculated_concentration, concentration_units) to the component
          unknowns[feature_it].getSubordinates()[sub_it].setMetaValue("calculated_concentration",calculated_concentration);
          String concentration_units = quant_methods_it->second.getConcentrationUnits();
          unknowns[feature_it].getSubordinates()[sub_it].setMetaValue("concentration_units",concentration_units);
          // calculate the bias?
        }
        else
        {
          LOG_INFO << "Component " << component_name << " does not have a quantitation method.";
          LOG_INFO << "No concentration will be calculated.";
          unknowns[feature_it].getSubordinates()[sub_it].setMetaValue("calculated_concentration","");
          unknowns[feature_it].getSubordinates()[sub_it].setMetaValue("concentration_units","");
        }
      }
    }
    // }
  }
Exemplo n.º 18
0
  ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm,
                         bool labeled = false)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    StringList ins;
    if (labeled) ins.push_back(getStringOption_("in"));
    else ins = getStringList_("in");
    String out = getStringOption_("out");

    //-------------------------------------------------------------
    // check for valid input
    //-------------------------------------------------------------
    // check if all input files have the correct type
    FileTypes::Type file_type = FileHandler::getType(ins[0]);
    for (Size i = 0; i < ins.size(); ++i)
    {
      if (FileHandler::getType(ins[i]) != file_type)
      {
        writeLog_("Error: All input files must be of the same type!");
        return ILLEGAL_PARAMETERS;
      }
    }

    //-------------------------------------------------------------
    // set up algorithm
    //-------------------------------------------------------------
    Param algorithm_param = getParam_().copy("algorithm:", true);
    writeDebug_("Used algorithm parameters", algorithm_param, 3);
    algorithm->setParameters(algorithm_param);

    //-------------------------------------------------------------
    // perform grouping
    //-------------------------------------------------------------
    // load input
    ConsensusMap out_map;
    StringList ms_run_locations;
    if (file_type == FileTypes::FEATUREXML)
    {
      vector<ConsensusMap > maps(ins.size());
      FeatureXMLFile f;
      FeatureFileOptions param = f.getOptions();
      // to save memory don't load convex hulls and subordinates
      param.setLoadSubordinates(false);
      param.setLoadConvexHull(false);
      f.setOptions(param);

      Size progress = 0;
      setLogType(ProgressLogger::CMD);
      startProgress(0, ins.size(), "reading input");
      for (Size i = 0; i < ins.size(); ++i)
      {
        FeatureMap tmp;
        f.load(ins[i], tmp);
        out_map.getFileDescriptions()[i].filename = ins[i];
        out_map.getFileDescriptions()[i].size = tmp.size();
        out_map.getFileDescriptions()[i].unique_id = tmp.getUniqueId();

        // copy over information on the primary MS run
        const StringList& ms_runs = tmp.getPrimaryMSRunPath();
        ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());

        // to save memory, remove convex hulls, subordinates:
        for (FeatureMap::Iterator it = tmp.begin(); it != tmp.end();
             ++it)
        {
          it->getSubordinates().clear();
          it->getConvexHulls().clear();
          it->clearMetaInfo();
        }

        MapConversion::convert(i, tmp, maps[i]);

        maps[i].updateRanges();

        setProgress(progress++);
      }
      endProgress();

      // exception for "labeled" algorithms: copy file descriptions
      if (labeled)
      {
        out_map.getFileDescriptions()[1] = out_map.getFileDescriptions()[0];
        out_map.getFileDescriptions()[0].label = "light";
        out_map.getFileDescriptions()[1].label = "heavy";
      }

      // group
      algorithm->group(maps, out_map);
    }
    else
    {
      vector<ConsensusMap> maps(ins.size());
      ConsensusXMLFile f;
      for (Size i = 0; i < ins.size(); ++i)
      {
        f.load(ins[i], maps[i]);
        maps[i].updateRanges();
        // copy over information on the primary MS run
        const StringList& ms_runs = maps[i].getPrimaryMSRunPath();
        ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end());
      }
      // group
      algorithm->group(maps, out_map);

      // set file descriptions:
      bool keep_subelements = getFlag_("keep_subelements");
      if (!keep_subelements)
      {
        for (Size i = 0; i < ins.size(); ++i)
        {
          out_map.getFileDescriptions()[i].filename = ins[i];
          out_map.getFileDescriptions()[i].size = maps[i].size();
          out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId();
        }
      }
      else
      {
        // components of the output map are not the input maps themselves, but
        // the components of the input maps:
        algorithm->transferSubelements(maps, out_map);
      }
    }

    // assign unique ids
    out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId);

    // annotate output with data processing info
    addDataProcessing_(out_map,
                       getProcessingInfo_(DataProcessing::FEATURE_GROUPING));

    // set primary MS runs
    out_map.setPrimaryMSRunPath(ms_run_locations);

    // write output
    ConsensusXMLFile().store(out, out_map);

    // some statistics
    map<Size, UInt> num_consfeat_of_size;
    for (ConsensusMap::const_iterator cmit = out_map.begin();
         cmit != out_map.end(); ++cmit)
    {
      ++num_consfeat_of_size[cmit->size()];
    }

    LOG_INFO << "Number of consensus features:" << endl;
    for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin();
         i != num_consfeat_of_size.rend(); ++i)
    {
      LOG_INFO << "  of size " << setw(2) << i->first << ": " << setw(6) 
               << i->second << endl;
    }
    LOG_INFO << "  total:      " << setw(6) << out_map.size() << endl;

    return EXECUTION_OK;
  }
Exemplo n.º 19
0
START_SECTION((FeatureMap& operator = (const FeatureMap& rhs)))
	FeatureMap<> map1;
	map1.push_back(feature1);
	map1.push_back(feature2);
	map1.push_back(feature3);
	map1.updateRanges();
	map1.setIdentifier("lsid");
	map1.getDataProcessing().resize(1);
	map1.getProteinIdentifications().resize(1);
	map1.getUnassignedPeptideIdentifications().resize(1);

	//assignment
	FeatureMap<> map2;
	map2 = map1;

	TEST_EQUAL(map2.size(),3);
  TEST_REAL_SIMILAR(map2.getMaxInt(),1.0)
  TEST_STRING_EQUAL(map2.getIdentifier(),"lsid")
  TEST_EQUAL(map2.getDataProcessing().size(),1)
	TEST_EQUAL(map2.getProteinIdentifications().size(),1);
	TEST_EQUAL(map2.getUnassignedPeptideIdentifications().size(),1);

  //assignment of empty object
   map2 = FeatureMap<>();

	TEST_EQUAL(map2.size(),0);
	TEST_REAL_SIMILAR(map2.getMinInt(), numeric_limits<DoubleReal>::max())
	TEST_REAL_SIMILAR(map2.getMaxInt(), -numeric_limits<DoubleReal>::max())
  TEST_STRING_EQUAL(map2.getIdentifier(),"")
  TEST_EQUAL(map2.getDataProcessing().size(),0)
	TEST_EQUAL(map2.getProteinIdentifications().size(),0);
Exemplo n.º 20
0
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    String in = getStringOption_("in");
    String out = getStringOption_("out");

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    MzMLFile mzMLFile;
    mzMLFile.setLogType(log_type_);
    MSExperiment<Peak1D> input;
    mzMLFile.getOptions().addMSLevel(1);
    mzMLFile.load(in, input);

    if (input.empty())
    {
      LOG_WARN << "The given file does not contain any conventional peak data, but might"
                  " contain chromatograms. This tool currently cannot handle them, sorry.";
      return INCOMPATIBLE_INPUT_DATA;
    }

    //check if spectra are sorted
    for (Size i = 0; i < input.size(); ++i)
    {
      if (!input[i].isSorted())
      {
        writeLog_("Error: Not all spectra are sorted according to peak m/z positions. Use FileFilter to sort the input!");
        return INCOMPATIBLE_INPUT_DATA;
      }
    }

    //-------------------------------------------------------------
    // pick
    //-------------------------------------------------------------
    FeatureMap<> output;

    FeatureFinder ff;
    Param param = getParam_().copy("algorithm:", true);

    FFSH ffsh;
    ffsh.setParameters(param);
    ffsh.setData(input, output, ff);
    ffsh.run();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    //annotate output with data processing info
    addDataProcessing_(output, getProcessingInfo_(DataProcessing::PEAK_PICKING));
    addDataProcessing_(output, getProcessingInfo_(DataProcessing::QUANTITATION));
    output.ensureUniqueId();
    for (Size i = 0; i < output.size(); i++)
    {
      output[i].ensureUniqueId();
    }
    FeatureXMLFile().store(out, output);

    return EXECUTION_OK;
  }
Exemplo n.º 21
0
  ExitCodes main_(int, const char**)
  {
    vector<ProteinIdentification> prot_ids;
    vector<PeptideIdentification> pep_ids;
    ProteinHit temp_protein_hit;

    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String inputfile_id               = getStringOption_("id");
    String inputfile_feature       = getStringOption_("feature");
    String inputfile_consensus  = getStringOption_("consensus");
    String inputfile_raw            = getStringOption_("in");
    String outputfile_name       = getStringOption_("out");

    //~ bool Ms1(getFlag_("MS1"));
    //~ bool Ms2(getFlag_("MS2"));
    bool remove_duplicate_features(getFlag_("remove_duplicate_features"));
    
    //-------------------------------------------------------------
    // fetch vocabularies
    //------------------------------------------------------------
    ControlledVocabulary cv;
    cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo"));
    cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo"));
 
     QcMLFile qcmlfile;

    //-------------------------------------------------------------
    // MS  aqiusition
    //------------------------------------------------------------
    String base_name = QFileInfo(QString::fromStdString(inputfile_raw)).baseName();

    cout << "Reading mzML file..." << endl;
    MzMLFile mz_data_file;
    MSExperiment<Peak1D> exp;
    MzMLFile().load(inputfile_raw, exp);
    
    //---prep input
    exp.sortSpectra();
    UInt min_mz = std::numeric_limits<UInt>::max();
    UInt max_mz = 0;
    std::map<Size, UInt> mslevelcounts;
    
    qcmlfile.registerRun(base_name,base_name); //TODO use UIDs
    
    //---base MS aquisition qp
    String msaq_ref = base_name + "_msaq";
    QcMLFile::QualityParameter qp;
    qp.id = msaq_ref; ///< Identifier
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000004";
    try
    {
      //~ const ControlledVocabulary::CVTerm& test = cv.getTermByName("MS aquisition result details");
      //~ cout << test.name << test.id << endl;
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      //~ const ControlledVocabulary::CVTerm& term = cv.getTerm("0000004");
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "mzML file"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    //---file origin qp
    qp = QcMLFile::QualityParameter();
    qp.name = "mzML file"; ///< Name
    qp.id = base_name + "_run_name"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000577";
    qp.value = base_name;
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    qp = QcMLFile::QualityParameter();
    qp.name = "instrument model"; ///< Name
    qp.id = base_name + "_instrument_name"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000031";
    qp.value = exp.getInstrument().getName();
    qcmlfile.addRunQualityParameter(base_name, qp);    

    qp = QcMLFile::QualityParameter();
    qp.name = "completion time"; ///< Name
    qp.id = base_name + "_date"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000747";
    qp.value = exp.getDateTime().getDate();
    qcmlfile.addRunQualityParameter(base_name, qp);

    //---precursors at
    QcMLFile::Attachment at;
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000044";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_precursors"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "precursors"; ///< Name
    }

    at.colTypes.push_back("MS:1000894_[sec]"); //RT
    at.colTypes.push_back("MS:1000040"); //MZ
    for (Size i = 0; i < exp.size(); ++i)
    {
      mslevelcounts[exp[i].getMSLevel()]++;
      if (exp[i].getMSLevel() == 2)
      {
        if (exp[i].getPrecursors().front().getMZ() < min_mz)
        {
          min_mz = exp[i].getPrecursors().front().getMZ();
        }
        if (exp[i].getPrecursors().front().getMZ() > max_mz)
        {
          max_mz = exp[i].getPrecursors().front().getMZ();
        }
        std::vector<String> row;
        row.push_back(exp[i].getRT());
        row.push_back(exp[i].getPrecursors().front().getMZ());
        at.tableRows.push_back(row);
      }
    }
    qcmlfile.addRunAttachment(base_name, at);

    //---aquisition results qp
    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000006"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_ms1aquisition"; ///< Identifier
    qp.value = String(mslevelcounts[1]);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of ms1 spectra"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    

    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000007"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_ms2aquisition"; ///< Identifier
    qp.value = String(mslevelcounts[2]);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of ms2 spectra"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);

    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000008"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_Chromaquisition"; ///< Identifier
    qp.value = String(exp.getChromatograms().size());
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of chromatograms"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000009";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_mzrange"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS MZ aquisition ranges"; ///< Name
    }

    at.colTypes.push_back("QC:0000010"); //MZ
    at.colTypes.push_back("QC:0000011"); //MZ
    std::vector<String> rowmz;
    rowmz.push_back(String(min_mz));
    rowmz.push_back(String(max_mz));
    at.tableRows.push_back(rowmz);
    qcmlfile.addRunAttachment(base_name, at);

    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000012";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_rtrange"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS RT aquisition ranges"; ///< Name
    }

    at.colTypes.push_back("QC:0000013"); //MZ
    at.colTypes.push_back("QC:0000014"); //MZ
    std::vector<String> rowrt;
    rowrt.push_back(String(exp.begin()->getRT()));
    rowrt.push_back(String(exp.getSpectra().back().getRT()));
    at.tableRows.push_back(rowrt);
    qcmlfile.addRunAttachment(base_name, at);
    

    //---ion current stability ( & tic ) qp
    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000022";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_tics"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS TICs"; ///< Name
    }
    
    at.colTypes.push_back("MS:1000894_[sec]");
    at.colTypes.push_back("MS:1000285");
    UInt max = 0;
    Size below_10k = 0;
    for (Size i = 0; i < exp.size(); ++i)
    {
      if (exp[i].getMSLevel() == 1)
      {
        UInt sum = 0;
        for (Size j = 0; j < exp[i].size(); ++j)
        {
          sum += exp[i][j].getIntensity();
        }
        if (sum > max)
        {
          max = sum;
        }
        if (sum < 10000)
        {
          ++below_10k;
        }
        std::vector<String> row;
        row.push_back(exp[i].getRT());
        row.push_back(sum);
        at.tableRows.push_back(row);
      }
    }
    qcmlfile.addRunAttachment(base_name, at);
    

    qp = QcMLFile::QualityParameter();
    qp.id = base_name + "_ticslump"; ///< Identifier
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000023";
    qp.value = String((100 / exp.size()) * below_10k);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "percentage of tic slumps"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);

    
    //-------------------------------------------------------------
    // MS  id
    //------------------------------------------------------------
    if (inputfile_id != "")
    {
      IdXMLFile().load(inputfile_id, prot_ids, pep_ids);
      cerr << "idXML read ended. Found " << pep_ids.size() << " peptide identifications." << endl;

      ProteinIdentification::SearchParameters params = prot_ids[0].getSearchParameters();
      vector<String> var_mods = params.variable_modifications;
      //~ boost::regex re("(?<=[KR])(?=[^P])");
     
      String msid_ref = base_name + "_msid";
      QcMLFile::QualityParameter qp;
      qp.id = msid_ref; ///< Identifier
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000025";
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "MS identification result details"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000026";
      at.qualityRef = msid_ref;
      at.id = base_name + "_idsetting"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "MS id settings"; ///< Name
      }
      
      at.colTypes.push_back("MS:1001013"); //MS:1001013 db name  MS:1001016 version  MS:1001020 taxonomy
      at.colTypes.push_back("MS:1001016");
      at.colTypes.push_back("MS:1001020");
      std::vector<String> row;
      row.push_back(String(prot_ids.front().getSearchParameters().db));
      row.push_back(String(prot_ids.front().getSearchParameters().db_version));
      row.push_back(String(prot_ids.front().getSearchParameters().taxonomy));
      at.tableRows.push_back(row);
      qcmlfile.addRunAttachment(base_name, at);


      UInt spectrum_count = 0;
      Size peptide_hit_count = 0;
      UInt runs_count = 0;
      Size protein_hit_count = 0;
      set<String> peptides;
      set<String> proteins;
      Size missedcleavages = 0;
      for (Size i = 0; i < pep_ids.size(); ++i)
      {
        if (!pep_ids[i].empty())
        {
          ++spectrum_count;
          peptide_hit_count += pep_ids[i].getHits().size();
          const vector<PeptideHit>& temp_hits = pep_ids[i].getHits();
          for (Size j = 0; j < temp_hits.size(); ++j)
          {
            peptides.insert(temp_hits[j].getSequence().toString());
          }
        }
      }
      for (set<String>::iterator it = peptides.begin(); it != peptides.end(); ++it)
      {
        for (String::const_iterator st = it->begin(); st != it->end() - 1; ++st)
        {
          if (*st == 'K' || *st == 'R')
          {
            ++missedcleavages;
          }
        }
      }

      for (Size i = 0; i < prot_ids.size(); ++i)
      {
        ++runs_count;
        protein_hit_count += prot_ids[i].getHits().size();
        const vector<ProteinHit>& temp_hits = prot_ids[i].getHits();
        for (Size j = 0; j < temp_hits.size(); ++j)
        {
          proteins.insert(temp_hits[j].getAccession());
        }
      }
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000037"; ///< cv accession
      qp.id = base_name + "_misscleave"; ///< Identifier
      qp.value = missedcleavages;
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "total number of missed cleavages"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);

      
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000032"; ///< cv accession
      qp.id = base_name + "_totprot"; ///< Identifier
      qp.value = protein_hit_count;
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "total number of identified proteins"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000033"; ///< cv accession
      qp.id = base_name + "_totuniqprot"; ///< Identifier
      qp.value = String(proteins.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of uniquely identified proteins"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000029"; ///< cv accession
      qp.id = base_name + "_psms"; ///< Identifier
      qp.value = String(spectrum_count);
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of PSM"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000030"; ///< cv accession
      qp.id = base_name + "_totpeps"; ///< Identifier
      qp.value = String(peptide_hit_count);
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of identified peptides"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000031"; ///< cv accession
      qp.id = base_name + "_totuniqpeps"; ///< Identifier
      qp.value = String(peptides.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of uniquely identified peptides"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000038";
      at.qualityRef = msid_ref;
      at.id = base_name + "_massacc"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "delta ppm tables";
      }
      
      //~ delta ppm QC:0000039 RT MZ uniqueness ProteinID MS:1000885 target/decoy Score PeptideSequence MS:1000889 Annots string Similarity Charge UO:0000219 TheoreticalWeight UO:0000221 Oxidation_(M)
      at.colTypes.push_back("RT");
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("Score");
      at.colTypes.push_back("PeptideSequence");
      at.colTypes.push_back("Charge");
      at.colTypes.push_back("TheoreticalWeight");
      at.colTypes.push_back("delta_ppm");
      for (UInt w = 0; w < var_mods.size(); ++w)
      {
        at.colTypes.push_back(String(var_mods[w]).substitute(' ', '_'));
      }

      std::vector<double> deltas;
      //~ prot_ids[0].getSearchParameters();
      for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it)
      {
        if (it->getHits().size() > 0)
        {
          std::vector<String> row;
          row.push_back(it->getRT());
          row.push_back(it->getMZ());
          PeptideHit tmp = it->getHits().front(); //TODO depends on score & sort
          vector<UInt> pep_mods;
          for (UInt w = 0; w < var_mods.size(); ++w)
          {
            pep_mods.push_back(0);
          }
          for (AASequence::ConstIterator z =  tmp.getSequence().begin(); z != tmp.getSequence().end(); ++z)
          {
            Residue res = *z;
            String temp;
            if (res.getModification().size() > 0 && res.getModification() != "Carbamidomethyl")
            {
              temp = res.getModification() + " (" + res.getOneLetterCode()  + ")";
              //cout<<res.getModification()<<endl;
              for (UInt w = 0; w < var_mods.size(); ++w)
              {
                if (temp == var_mods[w])
                {
                  //cout<<temp;
                  pep_mods[w] += 1;
                }
              }
            }
          }
          row.push_back(tmp.getScore());
          row.push_back(tmp.getSequence().toString().removeWhitespaces());
          row.push_back(tmp.getCharge());
          row.push_back(String((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()));
          double dppm = /* std::abs */ (getMassDifference(((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()), it->getMZ(), true));
          row.push_back(String(dppm));
          deltas.push_back(dppm);
          for (UInt w = 0; w < var_mods.size(); ++w)
          {
            row.push_back(pep_mods[w]);
          }
          at.tableRows.push_back(row);
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
      

      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000040"; ///< cv accession
      qp.id = base_name + "_mean_delta"; ///< Identifier
      qp.value = String(OpenMS::Math::mean(deltas.begin(), deltas.end()));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "mean delta ppm"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000041"; ///< cv accession
      qp.id = base_name + "_median_delta"; ///< Identifier
      qp.value = String(OpenMS::Math::median(deltas.begin(), deltas.end(), false));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "median delta ppm"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000035"; ///< cv accession
      qp.id = base_name + "_ratio_id"; ///< Identifier
      qp.value = String(double(pep_ids.size()) / double(mslevelcounts[2]));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "id ratio"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);
    }

    //-------------------------------------------------------------
    // MS quantitation
    //------------------------------------------------------------
    FeatureMap map;
    String msqu_ref = base_name + "_msqu";
    if (inputfile_feature != "")
    {
      FeatureXMLFile f;
      f.load(inputfile_feature, map);

      cout << "Read featureXML file..." << endl;

      //~ UInt fiter = 0;
      map.sortByRT();
      map.updateRanges();

      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000045"; ///< cv accession
      qp.id = msqu_ref; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "MS quantification result details"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);
      
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000046"; ///< cv accession
      qp.id = base_name + "_feature_count"; ///< Identifier
      qp.value = String(map.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "number of features"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);      
    }

    if (inputfile_feature != "" && !remove_duplicate_features)
    {
      
      QcMLFile::Attachment at;
      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000047";
      at.qualityRef = msqu_ref;
      at.id = base_name + "_features"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "features"; ///< Name
      }
      
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("RT");
      at.colTypes.push_back("Intensity");
      at.colTypes.push_back("Charge");
      at.colTypes.push_back("Quality");
      at.colTypes.push_back("FWHM");
      at.colTypes.push_back("IDs");
      UInt fiter = 0;
      map.sortByRT();
      //ofstream out(outputfile_name.c_str());
      while (fiter < map.size())
      {
        std::vector<String> row;
        row.push_back(map[fiter].getMZ());
        row.push_back(map[fiter].getRT());
        row.push_back(map[fiter].getIntensity());
        row.push_back(map[fiter].getCharge());
        row.push_back(map[fiter].getOverallQuality());
        row.push_back(map[fiter].getWidth());
        row.push_back(map[fiter].getPeptideIdentifications().size());
        fiter++;
        at.tableRows.push_back(row);
      }     
      qcmlfile.addRunAttachment(base_name, at);
    }
    else if (inputfile_feature != "" && remove_duplicate_features)
    {
      QcMLFile::Attachment at;
      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000047";
      at.qualityRef = msqu_ref;
      at.id = base_name + "_features"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "features"; ///< Name
      }
      
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("RT");
      at.colTypes.push_back("Intensity");
      at.colTypes.push_back("Charge");
      FeatureMap map, map_out;
      FeatureXMLFile f;
      f.load(inputfile_feature, map);
      UInt fiter = 0;
      map.sortByRT();
      while (fiter < map.size())
      {
        FeatureMap map_tmp;
        for (UInt k = fiter; k <= map.size(); ++k)
        {
          if (abs(map[fiter].getRT() - map[k].getRT()) < 0.1)
          {
            //~ cout << fiter << endl;
            map_tmp.push_back(map[k]);
          }
          else
          {
            fiter = k;
            break;
          }
        }
        map_tmp.sortByMZ();
        UInt retif = 1;
        map_out.push_back(map_tmp[0]);
        while (retif < map_tmp.size())
        {
          if (abs(map_tmp[retif].getMZ() - map_tmp[retif - 1].getMZ()) > 0.01)
          {
            cout << "equal RT, but mass different" << endl;
            map_out.push_back(map_tmp[retif]);
          }
          retif++;
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
    }
    if (inputfile_consensus != "")
    {
      cout << "Reading consensusXML file..." << endl;
      ConsensusXMLFile f;
      ConsensusMap map;
      f.load(inputfile_consensus, map);
      //~ String CONSENSUS_NAME = "_consensus.tsv";
      //~ String combined_out = outputfile_name + CONSENSUS_NAME;
      //~ ofstream out(combined_out.c_str());

      at = QcMLFile::Attachment();
      qp.name = "consensuspoints"; ///< Name
      //~ qp.id = base_name + "_consensuses"; ///< Identifier
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:xxxxxxxx"; ///< cv accession "featuremapper results"

      at.colTypes.push_back("Native_spectrum_ID");
      at.colTypes.push_back("DECON_RT_(sec)");
      at.colTypes.push_back("DECON_MZ_(Th)");
      at.colTypes.push_back("DECON_Intensity");
      at.colTypes.push_back("Feature_RT_(sec)");
      at.colTypes.push_back("Feature_MZ_(Th)");
      at.colTypes.push_back("Feature_Intensity");
      at.colTypes.push_back("Feature_Charge");
      for (ConsensusMap::const_iterator cmit = map.begin(); cmit != map.end(); ++cmit)
      {
        const ConsensusFeature& CF = *cmit;
        for (ConsensusFeature::const_iterator cfit = CF.begin(); cfit != CF.end(); ++cfit)
        {
          std::vector<String> row;
          FeatureHandle FH = *cfit;
          row.push_back(CF.getMetaValue("spectrum_native_id"));
          row.push_back(CF.getRT()); row.push_back(CF.getMZ());
          row.push_back(CF.getIntensity());
          row.push_back(FH.getRT());
          row.push_back(FH.getMZ());
          row.push_back(FH.getCharge());
          at.tableRows.push_back(row);
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
    }
    
    
    //-------------------------------------------------------------
    // finalize
    //------------------------------------------------------------
    qcmlfile.store(outputfile_name);
    return EXECUTION_OK;
  }
Exemplo n.º 22
0
 size_t size() {
     return feat_map.size();
 }
Exemplo n.º 23
0
  ExitCodes main_(int, const char **)
  {
    //load input features
    FeatureMap input;
    FeatureXMLFile().load(getStringOption_("in"), input);

    //load truth consensusXML
    ConsensusMap truth;
    ConsensusXMLFile().load(getStringOption_("truth"), truth);

    //parameters
    double mz_tol = getDoubleOption_("mz_tol");
    double rt_tol = getDoubleOption_("rt_tol");

    //seek manual feature in automatic feature map
    UInt matched_pairs = 0;
    UInt half_matched_pairs = 0;
    vector<double> t_ratio, i_ratio, rt_diffs, mz_diffs;
    for (Size t = 0; t < truth.size(); ++t)
    {
      if (truth[t].size() != 2)
      {
        cerr << "Error: consensus feature must contain exactly two elements!" << endl;
        continue;
      }
      vector<Feature> best_matches(2);
      vector<UInt> match_counts(2, 0);
      vector<Peak2D> elements(2);
      elements[0] = *(truth[t].getFeatures().begin());
      elements[1] = *(++(truth[t].getFeatures().begin()));
      double mz_tol_charged = mz_tol / truth[t].getCharge();
      for (Size e = 0; e < 2; ++e)
      {
        double best_score = 0.0;
        for (Size i = 0; i < input.size(); ++i)
        {
          const Feature & f_i = input[i];
          if (fabs(f_i.getRT() - elements[e].getRT()) < rt_tol
             && fabs(f_i.getMZ() - elements[e].getMZ()) < mz_tol_charged)
          {
            ++match_counts[e];
            double score = (1.0 - fabs(f_i.getMZ() - elements[e].getMZ()) / mz_tol_charged) * (1.0 - fabs(f_i.getRT() - elements[e].getRT()) / rt_tol);
            if (score > best_score)
            {
              best_score = score;
              best_matches[e] = f_i;
            }
          }
        }
      }

      //not matched
      if (match_counts[0] == 0 && match_counts[1] == 0)
      {
      }
      //half matched
      else if ((match_counts[0] > 0 && match_counts[1] == 0) || (match_counts[0] == 0 && match_counts[1] > 0))
      {
        ++half_matched_pairs;
      }
      //matched
      else
      {
        ++matched_pairs;
        double a_r = best_matches[0].getIntensity() / best_matches[1].getIntensity();
        t_ratio.push_back(a_r);
        double m_r = elements[0].getIntensity() / elements[1].getIntensity();
        i_ratio.push_back(m_r);
        rt_diffs.push_back(best_matches[1].getRT() - best_matches[0].getRT());
        mz_diffs.push_back((best_matches[1].getMZ() - best_matches[0].getMZ()) * truth[t].getCharge());
      }
    }

    cout << endl;
    cout << "pair detection statistics:" << endl;
    cout << "==========================" << endl;
    cout << "truth pairs: " << truth.size() << endl;
    cout << "input features: " << input.size() << endl;
    cout << endl;
    cout << "found: " << matched_pairs << " (" << String::number(100.0 * matched_pairs / truth.size(), 2) << "%)" << endl;
    cout << "half found : " << half_matched_pairs << " (" << String::number(100.0 * half_matched_pairs / truth.size(), 2) << "%)" << endl;
    cout << "not found : " << truth.size() - (matched_pairs + half_matched_pairs) << " (" << String::number(100.0 - 100.0 * (matched_pairs + half_matched_pairs) / truth.size(), 2) << "%)" << endl;
    cout << endl;
    cout << "relative pair ratios: " << fiveNumberQuotients(i_ratio, t_ratio, 3) << endl;
    cout << "pair distance RT : " << fiveNumbers(rt_diffs, 2) << endl;
    cout << "pair distance m/z: " << fiveNumbers(mz_diffs, 2) << endl;

    return EXECUTION_OK;
  }
Exemplo n.º 24
0
  input.updateRanges(1);
  FeatureMap<> output;

  //parameters
  Param param;
  //param.load(OPENMS_GET_TEST_DATA_PATH("FeatureFinderAlgorithmPicked.ini"));
  param = param.copy("FeatureFinder:1:algorithm:",true);
  //Dummy featurefinder
  FeatureFinder ff;

  FFSH ffsh;
  ffsh.setParameters(param);
  ffsh.setData(input, output, ff);
  ffsh.run();

  TEST_EQUAL(output.size(), 384);
	
	//TOLERANCE_ABSOLUTE(0.001);
	//TEST_REAL_SIMILAR(output[0].getOverallQuality(),0.8819);
  //TEST_REAL_SIMILAR(output[1].getOverallQuality(),0.8673);
  // ...
	
	//TOLERANCE_ABSOLUTE(20.0);
	TEST_REAL_SIMILAR(output[0].getIntensity(),20829);
	TEST_REAL_SIMILAR(output[1].getIntensity(),56818.6);
  // ...

	TEST_REAL_SIMILAR(output[0].getMZ(),300.060882568359);
	TEST_REAL_SIMILAR(output[1].getMZ(),300.060882568359);

  TEST_REAL_SIMILAR(output[0].getRT(),35.1000317866759);
Exemplo n.º 25
0
}
END_SECTION

START_SECTION(virtual ~KroenikFile())
{
	delete ptr;
}
END_SECTION


START_SECTION((template < typename FeatureMapType > void load(const String &filename, FeatureMapType &feature_map)))
{
  KroenikFile f;
  FeatureMap fm;
  f.load(OPENMS_GET_TEST_DATA_PATH("KroenikFile_test_1.krf"), fm);
  TEST_EQUAL(fm.size(),3)
  ABORT_IF(fm.size()!=3)
  TEST_EQUAL(fm[0].getRT(), 63.2)
  TEST_REAL_SIMILAR(fm[0].getMZ(), 1002.11)
  TEST_EQUAL(fm[0].getIntensity(), 999999)
  TEST_EQUAL(fm[0].getCharge(), 1)
  TEST_EQUAL(String(fm[0].getMetaValue("AveragineModifications")), String("Carbamido"))
  TEST_EQUAL(fm[1].getRT(),  62.2)
  TEST_REAL_SIMILAR(fm[1].getMZ(), 252.057	)
  TEST_EQUAL(fm[1].getIntensity(), 9999)
  TEST_EQUAL(fm[1].getCharge(), 2)
  TEST_EQUAL(String(fm[1].getMetaValue("AveragineModifications")), String("Carbamido2"))

  

  TEST_EXCEPTION(Exception::ParseError, f.load(OPENMS_GET_TEST_DATA_PATH("KroenikFile_test_2.krf"), fm));
Exemplo n.º 26
0
	iw->identifyCharge (*spec, map[0], 0, 0, 0, false);
	NOT_TESTABLE
END_SECTION

START_SECTION(void updateBoxStates(const MSExperiment< PeakType > &map, const Size scan_index, const UInt RT_interleave, const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1))
	iw->updateBoxStates(map, INT_MAX, 0, 0);
	NOT_TESTABLE
END_SECTION

START_SECTION((virtual std::multimap<double, Box> getClosedBoxes ()))
	TEST_EQUAL (iw->getClosedBoxes().size(), 1)
END_SECTION

START_SECTION(FeatureMap< Feature > mapSeeds2Features(const MSExperiment< PeakType > &map, const UInt RT_votes_cutoff))
	FeatureMap f = iw->mapSeeds2Features(map, 0);
	TEST_EQUAL (f.size(), 1)
END_SECTION

START_SECTION(void mergeFeatures(IsotopeWaveletTransform< PeakType > *later_iwt, const UInt RT_interleave, const UInt RT_votes_cutoff))
	NOT_TESTABLE //only via CUDA
END_SECTION

START_SECTION(double getLinearInterpolation(const typename MSSpectrum< PeakType >::const_iterator &left_iter, const double mz_pos, const typename MSSpectrum< PeakType >::const_iterator &right_iter))
	TEST_EQUAL((int)(iw->getLinearInterpolation(map[0].begin(), 1420.02, (map[0].begin()+1))*10),5)
END_SECTION

START_SECTION(double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b))
	TEST_EQUAL(iw->getLinearInterpolation(1,1, 1.5, 2, 2), 1.5)
END_SECTION

}
END_SECTION

START_SECTION(virtual ~SpecArrayFile())
{
	delete ptr;
}
END_SECTION


START_SECTION((template < typename FeatureMapType > void load(const String &filename, FeatureMapType &feature_map)))
{
  SpecArrayFile f;
  FeatureMap fm;
  f.load(OPENMS_GET_TEST_DATA_PATH("SpecArrayFile_test_1.peplist"), fm);
  TEST_EQUAL(fm.size(),2)
  ABORT_IF(fm.size()!=2)
  TEST_EQUAL(fm[0].getRT(), 60.1*60)
  TEST_REAL_SIMILAR(fm[0].getMZ(), 500.1)
  TEST_EQUAL(fm[0].getIntensity(), 4343534)
  TEST_EQUAL(fm[0].getCharge(), 5)
  TEST_EQUAL(double(fm[0].getMetaValue("s/n")), 3.2)
  TEST_EQUAL(fm[1].getRT(),  40.1*60)
  TEST_REAL_SIMILAR(fm[1].getMZ(), 700.1	)
  TEST_EQUAL(fm[1].getIntensity(), 222432)
  TEST_EQUAL(fm[1].getCharge(), 3)
  TEST_EQUAL(double(fm[1].getMetaValue("s/n")), 2.2)

  

  TEST_EXCEPTION(Exception::ParseError, f.load(OPENMS_GET_TEST_DATA_PATH("SpecArrayFile_test_2.peplist"), fm));
sl.push_back("xml-stylesheet");
sl.push_back("<featureMap");
sl.push_back("<feature id");
fsc.setWhitelist(sl);

//std::cout << "\n\n" << fsc.compareStrings("529090", "529091") << "\n\n\n";

START_SECTION((void run(std::vector< MassTrace > &, FeatureMap &, chromatograms &)))
{
    FeatureFindingMetabo test_ffm;
    // run with non-default setting (C13 isotope distance)
    Param p = test_ffm.getParameters();
    p.setValue("mz_scoring_13C", "true");
    test_ffm.setParameters(p);
    test_ffm.run(splitted_mt, test_fm, chromatograms);
    TEST_EQUAL(test_fm.size(), 93);

    // run with default settings (from paper using charge+isotope# dependent distances)
    p.setValue("report_convex_hulls", "true");
    p.setValue("mz_scoring_13C", "false");
    test_ffm.setParameters(p);
    test_ffm.run(splitted_mt, test_fm, chromatograms);
    TEST_EQUAL(test_fm.size(), 91);
    // --> this gives less features, i.e. more isotope clusters (but the input data is simulated and highly weird -- should be replaced at some point)

    // test annotation of input
    String tmp_file;
    NEW_TMP_FILE(tmp_file);
    FeatureXMLFile().store(tmp_file, test_fm);
    TEST_EQUAL(fsc.compareFiles(tmp_file, OPENMS_GET_TEST_DATA_PATH("FeatureFindingMetabo_output1.featureXML")), true);