Esempio n. 1
0
int
ProgressLogger::get_depth() const
{
  int depth = 1;
  for(ProgressLogger* i = get_subtask(); i != NULL; i = i->get_subtask())
    depth += 1;
  return depth;
}
Esempio n. 2
0
 void storeTransformationDescriptions_(const vector<TransformationDescription>&
                                       transformations, StringList& trafos)
 {
   // custom progress logger for this task:
   ProgressLogger progresslogger;
   progresslogger.setLogType(log_type_);
   progresslogger.startProgress(0, trafos.size(), 
                                "writing transformation files");
   for (Size i = 0; i < transformations.size(); ++i)
   {
     TransformationXMLFile().store(trafos[i], transformations[i]);
   }
   progresslogger.endProgress();
 }
Esempio n. 3
0
 void loadInitialMaps_(vector<MapType>& maps, StringList& ins, 
                       FileType& input_file)
 {
   // custom progress logger for this task:
   ProgressLogger progresslogger;
   progresslogger.setLogType(TOPPMapAlignerBase::log_type_);
   progresslogger.startProgress(0, ins.size(), "loading input files");
   for (Size i = 0; i < ins.size(); ++i)
   {
     progresslogger.setProgress(i);
     input_file.load(ins[i], maps[i]);
   }
   progresslogger.endProgress();
 }
Esempio n. 4
0
 void storeTransformedMaps_(vector<MapType>& maps, StringList& outs, 
                            FileType& output_file)
 {
   // custom progress logger for this task:
   ProgressLogger progresslogger;
   progresslogger.setLogType(log_type_);
   progresslogger.startProgress(0, outs.size(), "writing output files");
   for (Size i = 0; i < outs.size(); ++i)
   {
     progresslogger.setProgress(i);
     // annotate output with data processing info:
     addDataProcessing_(maps[i], 
                        getProcessingInfo_(DataProcessing::ALIGNMENT));
     output_file.store(outs[i], maps[i]);
   }
   progresslogger.endProgress();
 }
 void ConsensusMapNormalizerAlgorithmThreshold::normalizeMaps(ConsensusMap& map, const vector<double>& ratios)
 {
   ConsensusMap::Iterator cf_it;
   ProgressLogger progresslogger;
   progresslogger.setLogType(ProgressLogger::CMD);
   progresslogger.startProgress(0, map.size(), "normalizing maps");
   for (cf_it = map.begin(); cf_it != map.end(); ++cf_it)
   {
     progresslogger.setProgress(cf_it - map.begin());
     ConsensusFeature::HandleSetType::const_iterator f_it;
     for (f_it = cf_it->getFeatures().begin(); f_it != cf_it->getFeatures().end(); ++f_it)
     {
       f_it->asMutable().setIntensity(f_it->getIntensity() * ratios[f_it->getMapIndex()]);
     }
   }
   progresslogger.endProgress();
 }
Esempio n. 6
0
void
TLJPak::scan(const std::vector<std::string>& lst, ProgressLogger& logger)
{
  std::map<int, std::string> entry2name;

  for(std::vector<std::string>::const_iterator i = lst.begin(); i != lst.end(); ++i)
    {
      int j = lookup(*i);
      if (j != -1)
        files[j].pathname = *i;
    }

  logger.set_task_size(files.size());
  for(int i = 0; i < int(files.size()); ++i)
    {
      if (files[i].is_file() && files[i].pathname.empty())
        {
          // FIXME: Add filtering somewhere
          files[i].guesses = guess(i);
          assert(files[i].guesses.size() > 0);
          files[i].pathname = files[i].guesses.front();
          logger.println("found " + files[i].pathname + " (guessed)");
        }
      /* to much of an speed impact
      else if (!files[i].pathname.empty())
        {
        logger.println("found " + files[i].pathname);
        }
      */

      files[i].filetype = get_type(i);

      logger.set_task_status(i);
      logger.sync();
    }
}
  ExitCodes main_(int, const char**)
  {
    // parsing parameters
    String in(getStringOption_("in"));
    String feature_in(getStringOption_("feature_in"));
    String out(getStringOption_("out"));
    double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance"));

    // reading input
    FileHandler fh;
    FileTypes::Type in_type = fh.getType(in);

    PeakMap exp;
    fh.loadExperiment(in, exp, in_type, log_type_, false, false);
    exp.sortSpectra();

    FeatureMap feature_map;
    if (feature_in != "")
    {
      FeatureXMLFile().load(feature_in, feature_map);
    }

    // calculations
    FeatureFinderAlgorithmIsotopeWavelet iso_ff;
    Param ff_param(iso_ff.getParameters());
    ff_param.setValue("max_charge", getIntOption_("max_charge"));
    ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold"));
    iso_ff.setParameters(ff_param);

    FeatureFinder ff;
    ff.setLogType(ProgressLogger::NONE);

    PeakMap exp2 = exp;
    exp2.clear(false);
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->size() != 0)
      {
        exp2.addSpectrum(*it);
      }
    }

    exp = exp2;
    exp.updateRanges();

    // TODO check MS2 and MS1 counts
    ProgressLogger progresslogger;
    progresslogger.setLogType(log_type_);
    progresslogger.startProgress(0, exp.size(), "Correcting precursor masses");
    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      progresslogger.setProgress(exp.end() - it);
      if (it->getMSLevel() != 2)
      {
        continue;
      }
      // find first MS1 scan of the MS/MS scan
      PeakMap::Iterator ms1_it = it;
      while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1)
      {
        --ms1_it;
      }
      if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1)
      {
        writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT()));
        continue;
      }
      if (ms1_it->size() == 0)
      {
        writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1);
        continue;
      }

      PeakMap::Iterator ms2_it = ms1_it;
      ++ms2_it;

      while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2)
      {
        // first: error checks
        if (ms2_it->getPrecursors().empty())
        {
          writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1);
          ++ms2_it;
          continue;
        }
        else if (ms2_it->getPrecursors().size() > 1)
        {
          writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one.");
        }

        Precursor prec = *ms2_it->getPrecursors().begin();
        double prec_pos = prec.getMZ();

        PeakMap new_exp;
        // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...)
        PeakSpectrum zoom_spec;
        for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit)
        {
          if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3)
          {
            zoom_spec.push_back(*pit);
          }
        }
        new_exp.addSpectrum(zoom_spec);
        new_exp.updateRanges();
        FeatureMap features, seeds;
        ff.run("isotope_wavelet", new_exp, features, ff_param, seeds);
        if (features.empty())
        {
          writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1);
          ++ms2_it;
          continue;
        }

        double max_int(numeric_limits<double>::min());
        double min_dist(numeric_limits<double>::max());
        Size max_int_feat_idx(0);

        for (Size i = 0; i != features.size(); ++i)
        {
          if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance &&
              features[i].getIntensity() > max_int)
          {
            max_int_feat_idx = i;
            max_int = features[i].getIntensity();
            min_dist = fabs(features[i].getMZ() - prec_pos);
          }
        }


        writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5);
        if (min_dist < precursor_mass_tolerance)
        {
          prec.setMZ(features[max_int_feat_idx].getMZ());
          prec.setCharge(features[max_int_feat_idx].getCharge());
          vector<Precursor> precs;
          precs.push_back(prec);
          ms2_it->setPrecursors(precs);
          writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1);
        }

        ++ms2_it;
      }
      it = --ms2_it;
    }
    progresslogger.endProgress();

    // writing output
    fh.storeExperiment(out, exp, log_type_);

    return EXECUTION_OK;
  }
Esempio n. 8
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    String in = getStringOption_("in");
    String out = getStringOption_("out");
    String trafo_in = getStringOption_("trafo_in");
    String trafo_out = getStringOption_("trafo_out");
    Param model_params = getParam_().copy("model:", true);
    String model_type = model_params.getValue("type");
    model_params = model_params.copy(model_type + ":", true);

    ProgressLogger progresslogger;
    progresslogger.setLogType(log_type_);

    //-------------------------------------------------------------
    // check for valid input
    //-------------------------------------------------------------
    if (out.empty() && trafo_out.empty())
    {
      writeLog_("Error: Either a data or a transformation output file has to be provided (parameters 'out'/'trafo_out')");
      return ILLEGAL_PARAMETERS;
    }
    if (in.empty() != out.empty())
    {
      writeLog_("Error: Data input and output parameters ('in'/'out') must be used together");
      return ILLEGAL_PARAMETERS;
    }

    //-------------------------------------------------------------
    // apply transformation
    //-------------------------------------------------------------
    TransformationXMLFile trafoxml;
    TransformationDescription trafo;
    trafoxml.load(trafo_in, trafo);
    if (model_type != "none")
    {
      trafo.fitModel(model_type, model_params);
    }
    if (getFlag_("invert"))
    {
      trafo.invert();
    }
    if (!trafo_out.empty())
    {
      trafoxml.store(trafo_out, trafo);
    }
    if (!in.empty()) // load input
    {
      FileTypes::Type in_type = FileHandler::getType(in);
      if (in_type == FileTypes::MZML)
      {
        MzMLFile file;
        MSExperiment<> map;
        applyTransformation_(in, out, trafo, file, map);
      }
      else if (in_type == FileTypes::FEATUREXML)
      {
        FeatureXMLFile file;
        FeatureMap map;
        applyTransformation_(in, out, trafo, file, map);
      }
      else if (in_type == FileTypes::CONSENSUSXML)
      {
        ConsensusXMLFile file;
        ConsensusMap map;
        applyTransformation_(in, out, trafo, file, map);
      }
      else if (in_type == FileTypes::IDXML)
      {
        IdXMLFile file;
        vector<ProteinIdentification> proteins;
        vector<PeptideIdentification> peptides;
        file.load(in, proteins, peptides);
        bool store_original_rt = getFlag_("store_original_rt");
        MapAlignmentTransformer::transformRetentionTimes(peptides, trafo,
                                                         store_original_rt);
        // no "data processing" section in idXML
        file.store(out, proteins, peptides);
      }
    }

    return EXECUTION_OK;
  }
Esempio n. 9
0
  ExitCodes main_(int, const char**)
  {
    ExitCodes return_code = TOPPMapAlignerBase::checkParameters_();
    if (return_code != EXECUTION_OK) return return_code;

    // set up alignment algorithm:
    MapAlignmentAlgorithmIdentification algorithm;
    Param algo_params = getParam_().copy("algorithm:", true);
    algorithm.setParameters(algo_params);
    algorithm.setLogType(log_type_);

    Int reference_index = getReference_(algorithm);

    // handle in- and output files:
    StringList input_files = getStringList_("in");
    StringList output_files = getStringList_("out");
    StringList trafo_files = getStringList_("trafo_out");
    FileTypes::Type in_type = FileHandler::getType(input_files[0]);

    vector<TransformationDescription> transformations;

    //-------------------------------------------------------------
    // perform feature alignment
    //-------------------------------------------------------------
    if (in_type == FileTypes::FEATUREXML)
    {
      vector<FeatureMap> feature_maps(input_files.size());
      FeatureXMLFile fxml_file;
      if (output_files.empty())
      {
        // store only transformation descriptions, not transformed data =>
        // we can load only minimum required information:
        fxml_file.getOptions().setLoadConvexHull(false);
        fxml_file.getOptions().setLoadSubordinates(false);
      }
      loadInitialMaps_(feature_maps, input_files, fxml_file);

      performAlignment_(algorithm, feature_maps, transformations,
                        reference_index);

      if (!output_files.empty())
      {
        storeTransformedMaps_(feature_maps, output_files, fxml_file);
      }
    }

    //-------------------------------------------------------------
    // perform consensus alignment
    //-------------------------------------------------------------
    else if (in_type == FileTypes::CONSENSUSXML)
    {
      std::vector<ConsensusMap> consensus_maps(input_files.size());
      ConsensusXMLFile cxml_file;
      loadInitialMaps_(consensus_maps, input_files, cxml_file);

      performAlignment_(algorithm, consensus_maps, transformations,
                        reference_index);

      if (!output_files.empty())
      {
        storeTransformedMaps_(consensus_maps, output_files, cxml_file);
      }
    }

    //-------------------------------------------------------------
    // perform peptide alignment
    //-------------------------------------------------------------
    else if (in_type == FileTypes::IDXML)
    {
      vector<vector<ProteinIdentification> > protein_ids(input_files.size());
      vector<vector<PeptideIdentification> > peptide_ids(input_files.size());
      IdXMLFile idxml_file;
      ProgressLogger progresslogger;
      progresslogger.setLogType(log_type_);
      progresslogger.startProgress(0, input_files.size(),
                                   "loading input files");
      for (Size i = 0; i < input_files.size(); ++i)
      {
        progresslogger.setProgress(i);
        idxml_file.load(input_files[i], protein_ids[i], peptide_ids[i]);
      }
      progresslogger.endProgress();

      performAlignment_(algorithm, peptide_ids, transformations,
                        reference_index);

      if (!output_files.empty())
      {
        progresslogger.startProgress(0, output_files.size(), 
                                     "writing output files");
        for (Size i = 0; i < output_files.size(); ++i)
        {
          progresslogger.setProgress(i);
          idxml_file.store(output_files[i], protein_ids[i], peptide_ids[i]);
        }
        progresslogger.endProgress();
      }
    }

    if (!trafo_files.empty())
    {
      storeTransformationDescriptions_(transformations, trafo_files);
    }

    return EXECUTION_OK;
  }
Esempio n. 10
0
  ExitCodes main_(int, const char**) override
  {
    ExitCodes ret = TOPPMapAlignerBase::checkParameters_();
    if (ret != EXECUTION_OK) return ret;

    MapAlignmentAlgorithmPoseClustering algorithm;
    Param algo_params = getParam_().copy("algorithm:", true);
    algorithm.setParameters(algo_params);
    algorithm.setLogType(log_type_);

    StringList in_files = getStringList_("in");
    StringList out_files = getStringList_("out");
    StringList out_trafos = getStringList_("trafo_out");

    Size reference_index = getIntOption_("reference:index");
    String reference_file = getStringOption_("reference:file");

    FileTypes::Type in_type = FileHandler::getType(in_files[0]);
    String file;
    if (!reference_file.empty())
    {
      file = reference_file;
      reference_index = in_files.size(); // points to invalid index
    }
    else if (reference_index > 0) // normal reference (index was checked before)
    {
      file = in_files[--reference_index]; // ref. index is 1-based in parameters, but should be 0-based here
    }
    else if (reference_index == 0) // no reference given
    {
      LOG_INFO << "Picking a reference (by size) ..." << std::flush;
      // use map with highest number of features as reference:
      Size max_count(0);
      FeatureXMLFile f;
      for (Size i = 0; i < in_files.size(); ++i)
      {
        Size s = 0;
        if (in_type == FileTypes::FEATUREXML) 
        {
          s = f.loadSize(in_files[i]);
        }
        else if (in_type == FileTypes::MZML) // this is expensive!
        {
          PeakMap exp;
          MzMLFile().load(in_files[i], exp);
          exp.updateRanges(1);
          s = exp.getSize();
        }
        if (s > max_count)
        {
          max_count = s;
          reference_index = i;
        }
      }
      LOG_INFO << " done" << std::endl;
      file = in_files[reference_index];
    }

    FeatureXMLFile f_fxml;
    if (out_files.empty()) // no need to store featureXML, thus we can load only minimum required information
    {
      f_fxml.getOptions().setLoadConvexHull(false);
      f_fxml.getOptions().setLoadSubordinates(false);
    }
    if (in_type == FileTypes::FEATUREXML)
    {
      FeatureMap map_ref;
      FeatureXMLFile f_fxml_tmp; // for the reference, we never need CH or subordinates
      f_fxml_tmp.getOptions().setLoadConvexHull(false);
      f_fxml_tmp.getOptions().setLoadSubordinates(false);
      f_fxml_tmp.load(file, map_ref);
      algorithm.setReference(map_ref);
    }
    else if (in_type == FileTypes::MZML)
    {
      PeakMap map_ref;
      MzMLFile().load(file, map_ref);
      algorithm.setReference(map_ref);
    }

    ProgressLogger plog;
    plog.setLogType(log_type_);

    plog.startProgress(0, in_files.size(), "Aligning input maps");
    Size progress(0); // thread-safe progress
    // TODO: it should all work on featureXML files, since we might need them for output anyway. Converting to consensusXML is just wasting memory!
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, 1)
#endif
    for (int i = 0; i < static_cast<int>(in_files.size()); ++i)
    {
      TransformationDescription trafo;
      if (in_type == FileTypes::FEATUREXML)
      {
        FeatureMap map;
        // workaround for loading: use temporary FeatureXMLFile since it is not thread-safe
        FeatureXMLFile f_fxml_tmp; // do not use OMP-firstprivate, since FeatureXMLFile has no copy c'tor
        f_fxml_tmp.getOptions() = f_fxml.getOptions();
        f_fxml_tmp.load(in_files[i], map);
        if (i == static_cast<int>(reference_index)) trafo.fitModel("identity");
        else algorithm.align(map, trafo);
        if (out_files.size())
        {
          MapAlignmentTransformer::transformRetentionTimes(map, trafo);
          // annotate output with data processing info
          addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT));
          f_fxml_tmp.store(out_files[i], map);
        }
      }
      else if (in_type == FileTypes::MZML)
      {
        PeakMap map;
        MzMLFile().load(in_files[i], map);
        if (i == static_cast<int>(reference_index)) trafo.fitModel("identity");
        else algorithm.align(map, trafo);
        if (out_files.size())
        {
          MapAlignmentTransformer::transformRetentionTimes(map, trafo);
          // annotate output with data processing info
          addDataProcessing_(map, getProcessingInfo_(DataProcessing::ALIGNMENT));
          MzMLFile().store(out_files[i], map);
        }
      }

      if (!out_trafos.empty())
      {
        TransformationXMLFile().store(out_trafos[i], trafo);
      }

#ifdef _OPENMP
#pragma omp critical (MAPose_Progress)
#endif
      {
        plog.setProgress(++progress); // thread safe progress counter
      }

    }

    plog.endProgress();
    return EXECUTION_OK;
  }
Esempio n. 11
0
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in(getStringOption_("in"));
    String out(getStringOption_("out"));
    Size num_spots_per_row(getIntOption_("num_spots_per_row"));
    double RT_distance(getDoubleOption_("RT_distance"));

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    PeakMap exp;
    MzMLFile f;
    f.setLogType(log_type_);
    f.load(in, exp);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    ProgressLogger pl;
    pl.setLogType(log_type_);
    pl.startProgress(0, exp.size(), "Assigning pseudo RTs.");
    Size num_ms1(0), num_ms1_base(0), row_counter(0);
    bool row_to_reverse(false);
    double actual_RT(0);
    for (Size i = 0; i != exp.size(); ++i)
    {
      pl.setProgress(i);
      if (row_to_reverse)
      {
        actual_RT = (double)(num_ms1_base + (num_spots_per_row - row_counter)) * RT_distance;
        writeDebug_("RT=" + String(actual_RT) + " (modified, row_counter=" + String(row_counter) + ")", 1);
      }
      else
      {
        actual_RT = (double)num_ms1 * RT_distance;
        writeDebug_("RT=" + String(actual_RT), 1);
      }

      exp[i].setRT(actual_RT);

      if (exp[i].getMSLevel() == 1)
      {
        if (++row_counter >= num_spots_per_row)
        {
          row_counter = 0;
          if (row_to_reverse)
          {
            row_to_reverse = false;
          }
          else
          {
            row_to_reverse = true;
          }
        }
        ++num_ms1;
        if (!row_to_reverse)
        {
          num_ms1_base = num_ms1;
        }
      }
    }
    pl.endProgress();

    // sort the spectra according to their new RT
    exp.sortSpectra();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------


    f.store(out, exp);

    return EXECUTION_OK;
  }
  ExitCodes
  main_(int, const char**)
  {
    //-------------------------------------------------------------
    // general variables and data
    //-------------------------------------------------------------
    FileHandler fh;
    vector<PeptideIdentification> peptide_identifications;
    vector<ProteinIdentification> protein_identifications;

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    const String in = getStringOption_("in");

    ProgressLogger logger;
    logger.setLogType(ProgressLogger::CMD);
    logger.startProgress(0, 1, "Loading...");

    if (File::isDirectory(in))
    {
      const String in_directory = File::absolutePath(in).ensureLastChar('/');
      const String mz_file = getStringOption_("mz_file");
      const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide");

      UInt i = 0;
      FileHandler fh;
      FileTypes::Type type;
      MSExperiment<Peak1D> msexperiment;
      // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;)  However, now String::toInt() might throw.
      map<Int, float> num_and_rt;
      vector<String> NativeID;

      // The mz-File (if given)
      if (!mz_file.empty())
      {
        type = fh.getTypeByFileName(mz_file);
        fh.loadExperiment(mz_file, msexperiment, type);

        for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it)
        {
          String(spectra_it->getNativeID()).split('=', NativeID);
          try
          {
            num_and_rt[NativeID[1].toInt()] = spectra_it->getRT();
            // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01
          }
          catch (Exception::ConversionError& e)
          {
            writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
          }
        }
      }

      // Get list of the actual Sequest .out-Files
      StringList in_files;
      if (!File::fileList(in_directory, String("*.out"), in_files))
      {
        writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!");
      }

      // Now get to work ...
      for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it)
      {
        vector<PeptideIdentification> peptide_ids_seq;
        ProteinIdentification protein_id_seq;
        vector<double> pvalues_seq;
        vector<String> in_file_vec;

        SequestOutfile sequest_outfile;

        writeDebug_(String("Reading file ") + *in_files_it, 3);

        try
        {
          sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide);

          in_files_it->split('.', in_file_vec);

          for (Size j = 0; j < peptide_ids_seq.size(); ++j)
          {

            // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files.
            peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i);

            Int scan_number = 0;
            if (!mz_file.empty())
            {
              try
              {
                scan_number = in_file_vec[2].toInt();
                peptide_ids_seq[j].setRT(num_and_rt[scan_number]);
              }
              catch (Exception::ConversionError& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
              }
              catch (exception& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.what());
              }
              //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz
              const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge();
              peptide_ids_seq[j].setMZ(real_mz);
            }

            writeDebug_(String("scan: ") + String(scan_number) + String("  RT: ") + String(peptide_ids_seq[j].getRT()) + "  MZ: " + String(peptide_ids_seq[j].getMZ()) + "  Ident: " + peptide_ids_seq[j].getIdentifier(), 4);

            peptide_identifications.push_back(peptide_ids_seq[j]);
          }

          protein_id_seq.setIdentifier(*in_files_it + "_" + i);
          protein_identifications.push_back(protein_id_seq);
          ++i;
        }
        catch (Exception::ParseError& pe)
        {
          writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")");
          throw;
        }
        catch (...)
        {
          writeLog_(String("Error reading file: ") + *in_files_it);
          throw;
        }
      }

      writeDebug_("All files processed.", 3);
    } // ! directory
    else
    {
      FileTypes::Type in_type = fh.getType(in);

      if (in_type == FileTypes::PEPXML)
      {
        String exp_name = getStringOption_("mz_file");
        String orig_name =  getStringOption_("mz_name");
        bool use_precursor_data = getFlag_("use_precursor_data");

        if (exp_name.empty())
        {
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, orig_name);
        }
        else
        {
          MSExperiment<> exp;
          fh.loadExperiment(exp_name, exp);
          if (!orig_name.empty())
          {
            exp_name = orig_name;
          }
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, exp_name, exp,
                            use_precursor_data);
        }
      }
      else if (in_type == FileTypes::IDXML)
      {
        IdXMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::MZIDENTML)
      {
        LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl;
        MzIdentMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::PROTXML)
      {
        protein_identifications.resize(1);
        peptide_identifications.resize(1);
        ProtXMLFile().load(in, protein_identifications[0],
                           peptide_identifications[0]);
      }
      else if (in_type == FileTypes::OMSSAXML)
      {
        protein_identifications.resize(1);
        OMSSAXMLFile().load(in, protein_identifications[0],
                            peptide_identifications, true);
      }
      else if (in_type == FileTypes::MASCOTXML)
      {
        String scan_regex = getStringOption_("scan_regex");
        String exp_name = getStringOption_("mz_file");
        MascotXMLFile::RTMapping rt_mapping;
        if (!exp_name.empty())
        {
          PeakMap exp;
          // load only MS2 spectra:
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping);
        }
        protein_identifications.resize(1);
        MascotXMLFile().load(in, protein_identifications[0],
                             peptide_identifications, rt_mapping, scan_regex);
      }
      else if (in_type == FileTypes::XML)
      {
        ProteinIdentification protein_id;
        XTandemXMLFile().load(in, protein_id, peptide_identifications);
        protein_id.setSearchEngineVersion("");
        protein_id.setSearchEngine("XTandem");
        protein_identifications.push_back(protein_id);
        String exp_name = getStringOption_("mz_file");
        if (!exp_name.empty())
        {
          PeakMap exp;
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it)
          {
            UInt id = (Int)it->getMetaValue("spectrum_id");
            --id; // native IDs were written 1-based
            if (id < exp.size())
            {
              it->setRT(exp[id].getRT());
              double pre_mz(0.0);
              if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ();
              it->setMZ(pre_mz);
              it->removeMetaValue("spectrum_id");
            }
            else
            {
              LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl;
            }
          }
        }
      }
      else
      {
        writeLog_("Unknown input file type given. Aborting!");
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
    }
    logger.endProgress();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    const String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));
    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }
    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    logger.startProgress(0, 1, "Storing...");
    if (out_type == FileTypes::PEPXML)
    {
      bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed");
      String mz_file = getStringOption_("mz_file");
      String mz_name = getStringOption_("mz_name");
      PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed);
    }
    else if (out_type == FileTypes::IDXML)
    {
      IdXMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::MZIDENTML)
    {
      MzIdentMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::FASTA)
    {
      Size count = 0;
      ofstream fasta(out.c_str(), ios::out);
      for (Size i = 0; i < peptide_identifications.size(); ++i)
      {
        for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l)
        {
          const PeptideHit& hit = peptide_identifications[i].getHits()[l];
          fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++
                << "|" << hit.getSequence().toString() << endl;
          String seq = hit.getSequence().toUnmodifiedString();
          // FASTA files should have at most 60 characters of sequence info per line
          for (Size j = 0; j < seq.size(); j += 60)
          {
            Size k = min(j + 60, seq.size());
            fasta << string(seq[j], seq[k]) << endl;
          }
        }
      }
    }
    else
    {
      writeLog_("Unsupported output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }
    logger.endProgress();


    return EXECUTION_OK;
  }
Esempio n. 13
0
  ExitCodes main_(int, const char**) override
  {
    ExitCodes return_code = TOPPMapAlignerBase::checkParameters_();
    if (return_code != EXECUTION_OK) return return_code;

    // set up alignment algorithm:
    MapAlignmentAlgorithmIdentification algorithm;
    Param algo_params = getParam_().copy("algorithm:", true);
    algorithm.setParameters(algo_params);
    algorithm.setLogType(log_type_);

    Int reference_index = getReference_(algorithm);

    // handle in- and output files:
    StringList input_files = getStringList_("in");
    StringList output_files = getStringList_("out");
    StringList trafo_files = getStringList_("trafo_out");
    FileTypes::Type in_type = FileHandler::getType(input_files[0]);

    vector<TransformationDescription> transformations;

    //-------------------------------------------------------------
    // perform feature alignment
    //-------------------------------------------------------------
    if (in_type == FileTypes::FEATUREXML)
    {
      vector<FeatureMap> feature_maps(input_files.size());
      FeatureXMLFile fxml_file;
      if (output_files.empty())
      {
        // store only transformation descriptions, not transformed data =>
        // we can load only minimum required information:
        fxml_file.getOptions().setLoadConvexHull(false);
        fxml_file.getOptions().setLoadSubordinates(false);
      }
      loadInitialMaps_(feature_maps, input_files, fxml_file);

      //-------------------------------------------------------------
      // Extract (optional) fraction identifiers and associate with featureXMLs
      //-------------------------------------------------------------
      String design_file = getStringOption_("design");

      // determine map of fractions to runs
      map<unsigned, vector<String> > frac2files;

      // TODO: check if can be put in common helper function
      if (!design_file.empty())
      {
        // parse design file and determine fractions
        ExperimentalDesign ed = ExperimentalDesignFile::load(design_file, false);

        // determine if design defines more than one fraction (note: fraction and run IDs are one-based)
        frac2files = ed.getFractionToMSFilesMapping();

        // check if all fractions have the same number of MS runs associated
        if (!ed.sameNrOfMSFilesPerFraction())
        {
          writeLog_("Error: Number of runs must match for every fraction!");
          return ILLEGAL_PARAMETERS;
        }
      }
      else // no design file given
      {
        for (Size i = 0; i != input_files.size(); ++i)
        {
          // TODO: read proper MS file name from meta data
          frac2files[1].push_back("file" + String(i)); // associate each file with fraction 1
        }
      }

      // TODO: check and handle if featureXML order differs from run order

      // perform fraction-based alignment
      if (frac2files.size() == 1) // group one fraction
      {
        performAlignment_(algorithm, feature_maps, transformations,
          reference_index);
        applyTransformations_(feature_maps, transformations);
      }
      else // group multiple fractions
      {
        for (Size i = 1; i <= frac2files.size(); ++i)
        {
          vector<FeatureMap> fraction_maps;
          vector<TransformationDescription> fraction_transformations;

          size_t n_fractions = frac2files.size();

          // TODO FRACTIONS: determine map index based on annotated MS files (getPrimaryMSRuns())
          for (size_t feature_map_index = 0; 
            feature_map_index != n_fractions; 
            ++feature_map_index)
          {
            fraction_maps.push_back(feature_maps[feature_map_index]);
          }
          performAlignment_(algorithm, fraction_maps, fraction_transformations,
            reference_index);
          applyTransformations_(fraction_maps, fraction_transformations);

          // copy into transformations and feature maps
          transformations.insert(transformations.end(), fraction_transformations.begin(), fraction_transformations.end());

          Size f = 0;
          for (size_t feature_map_index = 0; 
            feature_map_index != n_fractions; 
            ++feature_map_index,
            ++f)
          {
            feature_maps[feature_map_index].swap(fraction_maps[f]);
          }
        }
      }

      if (!output_files.empty())
      {
        storeTransformedMaps_(feature_maps, output_files, fxml_file);
      }
    }

    //-------------------------------------------------------------
    // perform consensus alignment
    //-------------------------------------------------------------
    else if (in_type == FileTypes::CONSENSUSXML)
    {
      std::vector<ConsensusMap> consensus_maps(input_files.size());
      ConsensusXMLFile cxml_file;
      loadInitialMaps_(consensus_maps, input_files, cxml_file);

      performAlignment_(algorithm, consensus_maps, transformations,
                        reference_index);
      applyTransformations_(consensus_maps, transformations);

      if (!output_files.empty())
      {
        storeTransformedMaps_(consensus_maps, output_files, cxml_file);
      }
    }

    //-------------------------------------------------------------
    // perform peptide alignment
    //-------------------------------------------------------------
    else if (in_type == FileTypes::IDXML)
    {
      vector<vector<ProteinIdentification> > protein_ids(input_files.size());
      vector<vector<PeptideIdentification> > peptide_ids(input_files.size());
      IdXMLFile idxml_file;
      ProgressLogger progresslogger;
      progresslogger.setLogType(log_type_);
      progresslogger.startProgress(0, input_files.size(),
                                   "loading input files");
      for (Size i = 0; i < input_files.size(); ++i)
      {
        progresslogger.setProgress(i);
        idxml_file.load(input_files[i], protein_ids[i], peptide_ids[i]);
      }
      progresslogger.endProgress();

      performAlignment_(algorithm, peptide_ids, transformations,
                        reference_index);
      applyTransformations_(peptide_ids, transformations);

      if (!output_files.empty())
      {
        progresslogger.startProgress(0, output_files.size(), 
                                     "writing output files");
        for (Size i = 0; i < output_files.size(); ++i)
        {
          progresslogger.setProgress(i);
          idxml_file.store(output_files[i], protein_ids[i], peptide_ids[i]);
        }
        progresslogger.endProgress();
      }
    }

    if (!trafo_files.empty())
    {
      storeTransformationDescriptions_(transformations, trafo_files);
    }

    return EXECUTION_OK;
  }
Esempio n. 14
0
  ExitCodes main_(int, const char**)
  {
    ExitCodes ret = checkParameters_();
    if (ret != EXECUTION_OK) return ret;

    MapAlignmentAlgorithmSpectrumAlignment algorithm;
    Param algo_params = getParam_().copy("algorithm:", true);
    algorithm.setParameters(algo_params);
    algorithm.setLogType(log_type_);

    StringList ins = getStringList_("in");
    StringList outs = getStringList_("out");
    StringList trafos = getStringList_("trafo_out");
    Param model_params = getParam_().copy("model:", true);
    String model_type = model_params.getValue("type");
    model_params = model_params.copy(model_type + ":", true);
    std::vector<TransformationDescription> transformations;

    //-------------------------------------------------------------
    // perform peak alignment
    //-------------------------------------------------------------
    ProgressLogger progresslogger;
    progresslogger.setLogType(log_type_);

    // load input
    std::vector<MSExperiment<> > peak_maps(ins.size());
    MzMLFile f;
    f.setLogType(log_type_);
    progresslogger.startProgress(0, ins.size(), "loading input files");
    for (Size i = 0; i < ins.size(); ++i)
    {
      progresslogger.setProgress(i);
      f.load(ins[i], peak_maps[i]);
    }
    progresslogger.endProgress();

    // try to align
    algorithm.align(peak_maps, transformations);
    if (model_type != "none")
    {
      for (vector<TransformationDescription>::iterator it = 
             transformations.begin(); it != transformations.end(); ++it)
      {
        it->fitModel(model_type, model_params);
      }
    }

    // write output
    progresslogger.startProgress(0, outs.size(), "applying RT transformations and writing output files");
    for (Size i = 0; i < outs.size(); ++i)
    {
      progresslogger.setProgress(i);

      MapAlignmentTransformer::transformRetentionTimes(peak_maps[i], 
                                                       transformations[i]);
      // annotate output with data processing info
      addDataProcessing_(peak_maps[i], 
                         getProcessingInfo_(DataProcessing::ALIGNMENT));

      f.store(outs[i], peak_maps[i]);
    }
    progresslogger.endProgress();

    if (!trafos.empty())
    {
      TransformationXMLFile trafo_file;
      for (Size i = 0; i < transformations.size(); ++i)
      {
        trafo_file.store(trafos[i], transformations[i]);
      }
    }

    return EXECUTION_OK;
  }
Esempio n. 15
0
  void QTClusterFinder::run_(const vector<MapType> & input_maps,
                             ConsensusMap & result_map)
  {
    num_maps_ = input_maps.size();
    if (num_maps_ < 2)
    {
      throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
                                       "At least two input maps required");
    }

    // set up the distance functor (and set other parameters):
    DoubleReal max_intensity = input_maps[0].getMaxInt();
    DoubleReal max_mz = input_maps[0].getMax()[1];
    for (Size map_index = 1; map_index < num_maps_; ++map_index)
    {
      max_intensity = max(max_intensity, input_maps[map_index].getMaxInt());
      max_mz = max(max_mz, input_maps[map_index].getMax()[0]);
    }

    setParameters_(max_intensity, max_mz);

    // create the hash grid and fill it with features:
    //cout << "Hashing..." << endl;
    list<GridFeature> grid_features;
    Grid grid(Grid::ClusterCenter(max_diff_rt_, max_diff_mz_));
    for (Size map_index = 0; map_index < num_maps_; ++map_index)
    {
      for (Size feature_index = 0; feature_index < input_maps[map_index].size();
           ++feature_index)
      {
        grid_features.push_back(
          GridFeature(input_maps[map_index][feature_index], map_index,
                      feature_index));
        GridFeature & gfeature = grid_features.back();
        // sort peptide hits once now, instead of multiple times later:
        BaseFeature & feature = const_cast<BaseFeature &>(
          grid_features.back().getFeature());
        for (vector<PeptideIdentification>::iterator pep_it =
               feature.getPeptideIdentifications().begin(); pep_it !=
             feature.getPeptideIdentifications().end(); ++pep_it)
        {
          pep_it->sort();
        }
        grid.insert(std::make_pair(Grid::ClusterCenter(gfeature.getRT(), gfeature.getMZ()), &gfeature));
      }
    }

    // compute QT clustering:
    //cout << "Clustering..." << endl;
    list<QTCluster> clustering;
    computeClustering_(grid, clustering);
    // number of clusters == number of data points:
    Size size = clustering.size();

    // Create a temporary map where we store which GridFeatures are next to which Clusters
    OpenMSBoost::unordered_map<GridFeature *, std::vector< QTCluster * > > element_mapping;
    for (list<QTCluster>::iterator it = clustering.begin(); it != clustering.end(); ++it)
    {
      OpenMSBoost::unordered_map<Size, GridFeature *> elements;
      typedef std::multimap<DoubleReal, GridFeature *> InnerNeighborMap;
      typedef OpenMSBoost::unordered_map<Size, InnerNeighborMap > NeighborMap;
      NeighborMap neigh = it->getNeighbors();
      for (NeighborMap::iterator n_it = neigh.begin(); n_it != neigh.end(); ++n_it)
      {
        for (InnerNeighborMap::iterator i_it = n_it->second.begin(); i_it != n_it->second.end(); ++i_it)
        {
          element_mapping[i_it->second].push_back( &(*it) );
        }
      }
    }

    ProgressLogger logger;
    logger.setLogType(ProgressLogger::CMD);
    logger.startProgress(0, size, "linking features");
    Size progress = 0;
    result_map.clear(false);

    while (!clustering.empty())
    {
      // cout << "Clusters: " << clustering.size() << endl;
      ConsensusFeature consensus_feature;
      makeConsensusFeature_(clustering, consensus_feature, element_mapping);
      if (!clustering.empty())
      {
        result_map.push_back(consensus_feature);
      }
      logger.setProgress(progress++);
    }

    logger.endProgress();
  }