예제 #1
0
  void FeatureFinder::run(const String& algorithm_name, PeakMap& input_map, FeatureMap& features, const Param& param, const FeatureMap& seeds)
  {
    // Nothing to do if there is no data
    if ((algorithm_name != "mrm" && input_map.empty()) || (algorithm_name == "mrm" && input_map.getChromatograms().empty()))
    {
      features.clear(true);
      return;
    }

    // check input
    {
      // We need updated ranges => check number of peaks
      if (algorithm_name != "mrm" && input_map.getSize() == 0)
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder needs updated ranges on input map. Aborting.");
      }

      // We need MS1 data only => check levels
      if (algorithm_name != "mrm" && (input_map.getMSLevels().size() != 1 || input_map.getMSLevels()[0] != 1))
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data. Aborting.");
      }

      //Check if the peaks are sorted according to m/z
      if (!input_map.isSorted(true))
      {
        LOG_WARN << "Input map is not sorted by RT and m/z! This is done now, before applying the algorithm!" << std::endl;
        input_map.sortSpectra(true);
        input_map.sortChromatograms(true);
      }
      for (Size s = 0; s < input_map.size(); ++s)
      {
        if (input_map[s].empty())
          continue;
        if (input_map[s][0].getMZ() < 0)
        {
          throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on spectra that contain peaks with positive m/z values. Filter the data accordingly beforehand! Aborting.");
        }
      }
    }

    // initialize
    if (algorithm_name != "mrm" && algorithm_name != "centroided")
    {
      // Resize peak flag vector
      flags_.resize(input_map.size());
      for (Size i = 0; i < input_map.size(); ++i)
      {
        flags_[i].assign(input_map[i].size(), UNUSED);
      }
    }

    // do the work
    if (algorithm_name != "none")
    {
      FeatureFinderAlgorithm* algorithm = Factory<FeatureFinderAlgorithm>::create(algorithm_name);
      algorithm->setParameters(param);
      algorithm->setData(input_map, features, *this);
      algorithm->setSeeds(seeds);
      algorithm->run();
      delete(algorithm);
    }

    if (algorithm_name != "mrm") // mrm  works on chromatograms; the next section is only for conventional data
    {
      //report RT apex spectrum index and native ID for each feature
      for (Size i = 0; i < features.size(); ++i)
      {
        //index
        Size spectrum_index = input_map.RTBegin(features[i].getRT()) - input_map.begin();
        features[i].setMetaValue("spectrum_index", spectrum_index);
        //native id
        if (spectrum_index < input_map.size())
        {
          String native_id = input_map[spectrum_index].getNativeID();
          features[i].setMetaValue("spectrum_native_id", native_id);
        }
        else
        {
          /// @todo that happens sometimes using IsotopeWaveletFeatureFinder (Rene, Marc, Andreas, Clemens)
          std::cerr << "FeatureFinderAlgorithm_impl, line=" << __LINE__ << "; FixMe this cannot be, but happens" << std::endl;
        }
      }
    }
  }
예제 #2
0
  ExitCodes main_(int, const char**) override
  {
    ExitCodes ret = TOPPMapAlignerBase::checkParameters_();
    if (ret != EXECUTION_OK) return ret;

    MapAlignmentAlgorithmPoseClustering algorithm;
    Param algo_params = getParam_().copy("algorithm:", true);
    algorithm.setParameters(algo_params);
    algorithm.setLogType(log_type_);

    StringList in_files = getStringList_("in");
    StringList out_files = getStringList_("out");
    StringList out_trafos = getStringList_("trafo_out");

    Size reference_index = getIntOption_("reference:index");
    String reference_file = getStringOption_("reference:file");

    FileTypes::Type in_type = FileHandler::getType(in_files[0]);
    String file;
    if (!reference_file.empty())
    {
      file = reference_file;
      reference_index = in_files.size(); // points to invalid index
    }
    else if (reference_index > 0) // normal reference (index was checked before)
    {
      file = in_files[--reference_index]; // ref. index is 1-based in parameters, but should be 0-based here
    }
    else if (reference_index == 0) // no reference given
    {
      LOG_INFO << "Picking a reference (by size) ..." << std::flush;
      // use map with highest number of features as reference:
      Size max_count(0);
      FeatureXMLFile f;
      for (Size i = 0; i < in_files.size(); ++i)
      {
        Size s = 0;
        if (in_type == FileTypes::FEATUREXML) 
        {
          s = f.loadSize(in_files[i]);
        }
        else if (in_type == FileTypes::MZML) // this is expensive!
        {
          PeakMap exp;
          MzMLFile().load(in_files[i], exp);
          exp.updateRanges(1);
          s = exp.getSize();
        }
        if (s > max_count)
        {
          max_count = s;
          reference_index = i;
        }
      }
      LOG_INFO << " done" << std::endl;
      file = in_files[reference_index];
    }

    FeatureXMLFile f_fxml;
    if (out_files.empty()) // no need to store featureXML, thus we can load only minimum required information
    {
      f_fxml.getOptions().setLoadConvexHull(false);
      f_fxml.getOptions().setLoadSubordinates(false);
    }
    if (in_type == FileTypes::FEATUREXML)
    {
      FeatureMap map_ref;
      FeatureXMLFile f_fxml_tmp; // for the reference, we never need CH or subordinates
      f_fxml_tmp.getOptions().setLoadConvexHull(false);
      f_fxml_tmp.getOptions().setLoadSubordinates(false);
      f_fxml_tmp.load(file, map_ref);
      algorithm.setReference(map_ref);
    }
    else if (in_type == FileTypes::MZML)
    {
      PeakMap map_ref;
      MzMLFile().load(file, map_ref);
      algorithm.setReference(map_ref);
    }

    ProgressLogger plog;
    plog.setLogType(log_type_);

    plog.startProgress(0, in_files.size(), "Aligning input maps");
    Size progress(0); // thread-safe progress
    // TODO: it should all work on featureXML files, since we might need them for output anyway. Converting to consensusXML is just wasting memory!
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 1)
#endif
    for (int i = 0; i < static_cast<int>(in_files.size()); ++i)
    {
      TransformationDescription trafo;
      if (in_type == FileTypes::FEATUREXML)
      {
        FeatureMap map;
        // workaround for loading: use temporary FeatureXMLFile since it is not thread-safe
        FeatureXMLFile f_fxml_tmp; // do not use OMP-firstprivate, since FeatureXMLFile has no copy c'tor
        f_fxml_tmp.getOptions() = f_fxml.getOptions();
        f_fxml_tmp.load(in_files[i], map);
        if (i == static_cast<int>(reference_index)) trafo.fitModel("identity");
        else algorithm.align(map, trafo);
        if (out_files.size())
        {
          MapAlignmentTransformer::transformRetentionTimes(map, trafo);
          // annotate output with data processing info
          addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT));
          f_fxml_tmp.store(out_files[i], map);
        }
      }
      else if (in_type == FileTypes::MZML)
      {
        PeakMap map;
        MzMLFile().load(in_files[i], map);
        if (i == static_cast<int>(reference_index)) trafo.fitModel("identity");
        else algorithm.align(map, trafo);
        if (out_files.size())
        {
          MapAlignmentTransformer::transformRetentionTimes(map, trafo);
          // annotate output with data processing info
          addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT));
          MzMLFile().store(out_files[i], map);
        }
      }

      if (!out_trafos.empty())
      {
        TransformationXMLFile().store(out_trafos[i], trafo);
      }

#ifdef _OPENMP
#pragma omp critical (MAPose_Progress)
#endif
      {
        plog.setProgress(++progress); // thread safe progress counter
      }

    }

    plog.endProgress();
    return EXECUTION_OK;
  }