C++ (Cpp) FileHandler::loadExperiment Exemples

Exemple #1

0

Afficher le fichier

Fichier : PrecursorMassCorrector.cpp Projet : FabianAicheler/OpenMS

  ExitCodes main_(int, const char**)
  {
    // parsing parameters
    String in(getStringOption_("in"));
    String feature_in(getStringOption_("feature_in"));
    String out(getStringOption_("out"));
    double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance"));

    // reading input
    FileHandler fh;
    FileTypes::Type in_type = fh.getType(in);

    PeakMap exp;
    fh.loadExperiment(in, exp, in_type, log_type_, false, false);
    exp.sortSpectra();

    FeatureMap feature_map;
    if (feature_in != "")
    {
      FeatureXMLFile().load(feature_in, feature_map);
    }

    // calculations
    FeatureFinderAlgorithmIsotopeWavelet iso_ff;
    Param ff_param(iso_ff.getParameters());
    ff_param.setValue("max_charge", getIntOption_("max_charge"));
    ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold"));
    iso_ff.setParameters(ff_param);

    FeatureFinder ff;
    ff.setLogType(ProgressLogger::NONE);

    PeakMap exp2 = exp;
    exp2.clear(false);
    for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it)
    {
      if (it->size() != 0)
      {
        exp2.addSpectrum(*it);
      }
    }

    exp = exp2;
    exp.updateRanges();

    // TODO check MS2 and MS1 counts
    ProgressLogger progresslogger;
    progresslogger.setLogType(log_type_);
    progresslogger.startProgress(0, exp.size(), "Correcting precursor masses");
    for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it)
    {
      progresslogger.setProgress(exp.end() - it);
      if (it->getMSLevel() != 2)
      {
        continue;
      }
      // find first MS1 scan of the MS/MS scan
      PeakMap::Iterator ms1_it = it;
      while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1)
      {
        --ms1_it;
      }
      if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1)
      {
        writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT()));
        continue;
      }
      if (ms1_it->size() == 0)
      {
        writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1);
        continue;
      }

      PeakMap::Iterator ms2_it = ms1_it;
      ++ms2_it;

      while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2)
      {
        // first: error checks
        if (ms2_it->getPrecursors().empty())
        {
          writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1);
          ++ms2_it;
          continue;
        }
        else if (ms2_it->getPrecursors().size() > 1)
        {
          writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one.");
        }

        Precursor prec = *ms2_it->getPrecursors().begin();
        double prec_pos = prec.getMZ();

        PeakMap new_exp;
        // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...)
        PeakSpectrum zoom_spec;
        for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit)
        {
          if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3)
          {
            zoom_spec.push_back(*pit);
          }
        }
        new_exp.addSpectrum(zoom_spec);
        new_exp.updateRanges();
        FeatureMap features, seeds;
        ff.run("isotope_wavelet", new_exp, features, ff_param, seeds);
        if (features.empty())
        {
          writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1);
          ++ms2_it;
          continue;
        }

        double max_int(numeric_limits<double>::min());
        double min_dist(numeric_limits<double>::max());
        Size max_int_feat_idx(0);

        for (Size i = 0; i != features.size(); ++i)
        {
          if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance &&
              features[i].getIntensity() > max_int)
          {
            max_int_feat_idx = i;
            max_int = features[i].getIntensity();
            min_dist = fabs(features[i].getMZ() - prec_pos);
          }
        }


        writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5);
        if (min_dist < precursor_mass_tolerance)
        {
          prec.setMZ(features[max_int_feat_idx].getMZ());
          prec.setCharge(features[max_int_feat_idx].getCharge());
          vector<Precursor> precs;
          precs.push_back(prec);
          ms2_it->setPrecursors(precs);
          writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1);
        }

        ++ms2_it;
      }
      it = --ms2_it;
    }
    progresslogger.endProgress();

    // writing output
    fh.storeExperiment(out, exp, log_type_);

    return EXECUTION_OK;
  }

Exemple #2

0

Afficher le fichier

Fichier : FileMerger.C Projet : aiche/open-ms-mirror

  ExitCodes main_(int, const char**)
  {

    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------
    //file list
    StringList file_list = getStringList_("in");

    //file type
    FileHandler fh;
    FileTypes::Type force_type;
    if (getStringOption_("in_type").size() > 0)
    {
      force_type = FileTypes::nameToType(getStringOption_("in_type"));
    }
    else
    {
      force_type = fh.getType(file_list[0]);
    }

    //output file names and types
    String out_file = getStringOption_("out");

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    bool annotate_file_origin =  getFlag_("annotate_file_origin");

    if (force_type == FileTypes::FEATUREXML)
    {
      FeatureMap<> out;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        FeatureMap<> map;
        FeatureXMLFile fh;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      FeatureXMLFile f;
      f.store(out_file, out);

    }
    else if (force_type == FileTypes::CONSENSUSXML)
    {
      ConsensusMap out;
      ConsensusXMLFile fh;
      fh.load(file_list[0], out);
      //skip first file
      for (Size i = 1; i < file_list.size(); ++i)
      {
        ConsensusMap map;
        ConsensusXMLFile fh;
        fh.load(file_list[i], map);

        if (annotate_file_origin)
        {
          for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it)
          {
            it->setMetaValue("file_origin", DataValue(file_list[i]));
          }
        }
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      ConsensusXMLFile f;
      f.store(out_file, out);
    }
    else if (force_type == FileTypes::TRAML)
    {
      TargetedExperiment out;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        TargetedExperiment map;
        TraMLFile fh;
        fh.load(file_list[i], map);
        out += map;
      }

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      Software software;
      software.setName("FileMerger");
      software.setVersion(VersionInfo::getVersion());
      out.addSoftware(software);

      TraMLFile f;
      f.store(out_file, out);
    }
    else
    {
      // we might want to combine different types, thus we only
      // query in_type (which applies to all files)
      // and not the suffix or content of a single file
      force_type = FileTypes::nameToType(getStringOption_("in_type"));

      //rt
      bool rt_auto_number = getFlag_("raw:rt_auto");
      bool rt_filename = getFlag_("raw:rt_filename");
      bool rt_custom = false;
      DoubleList custom_rts = getDoubleList_("raw:rt_custom");
      if (custom_rts.size() != 0)
      {
        rt_custom = true;
        if (custom_rts.size() != file_list.size())
        {
          writeLog_("Custom retention time list must have as many elements as there are input files!");
          printUsage_();
          return ILLEGAL_PARAMETERS;
        }
      }

      //ms level
      bool user_ms_level = getFlag_("raw:user_ms_level");

      MSExperiment<> out;
      out.reserve(file_list.size());
      UInt rt_auto = 0;
      UInt native_id = 0;
      std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms;
      for (Size i = 0; i < file_list.size(); ++i)
      {
        String filename = file_list[i];

        //load file
        MSExperiment<> in;
        fh.loadExperiment(filename, in, force_type, log_type_);
        if (in.empty() && in.getChromatograms().empty())
        {
          writeLog_(String("Warning: Empty file '") + filename + "'!");
          continue;
        }
        out.reserve(out.size() + in.size());

        //warn if custom RT and more than one scan in input file
        if (rt_custom && in.size() > 1)
        {
          writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!");
        }

        for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2)
        {
          //handle rt
          Real rt_final = it2->getRT();
          if (rt_auto_number)
          {
            rt_final = ++rt_auto;
          }
          else if (rt_custom)
          {
            rt_final = custom_rts[i];
          }
          else if (rt_filename)
          {
            if (!filename.hasSubstring("rt"))
            {
              writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'"));
            }
            for (Size i = 0; i < filename.size(); ++i)
            {
              if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i]))
              {
                String rt;
                while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i])))
                {
                  rt += filename[i++];
                }
                if (rt.size() > 0)
                {
                  // remove dot from rt3892.98.dta
                  //                          ^
                  if (rt[rt.size() - 1] == '.')
                  {
                    // remove last character
                    rt.erase(rt.end() - 1);
                  }
                }
                try
                {
                  float tmp = rt.toFloat();
                  rt_final = tmp;
                }
                catch (Exception::ConversionError)
                {
                  writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'."));
                }
              }
            }
          }

          // none of the rt methods were successful
          if (rt_final == -1)
          {
            writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'");
          }

          out.addSpectrum(*it2);
          out.getSpectra().back().setRT(rt_final);
          out.getSpectra().back().setNativeID(native_id);

          if (user_ms_level)
          {
            out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level"));
          }
          ++native_id;
        }

        // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles)
        if (in.size() == 1)
        {
          out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]);
          in.getSourceFiles().clear();   // delete source file annotated from source file (its in the spectrum anyways)
        }
        // copy experimental settings from first file
        if (i == 0)
        {
          out.ExperimentalSettings::operator=(in);
        }
        else // otherwise append
        {
          out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then
        }

        // also add the chromatograms
        for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2)
        {
          all_chromatograms.push_back(*it2);
        }

      }
      // set the chromatograms
      out.setChromatograms(all_chromatograms);

      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------

      //annotate output with data processing info
      addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION));

      MzMLFile f;
      f.setLogType(log_type_);
      f.store(out_file, out);

    }

    return EXECUTION_OK;
  }

Exemple #3

0

Afficher le fichier

Fichier : FileHandler_test.cpp Projet : burlab/OpenMS

FileHandler tmp;
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("header_file.h")), FileTypes::UNKNOWN)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("class_test_infile.txt")), FileTypes::TXT)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("IdXMLFile_whole.idXML")), FileTypes::IDXML)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("ConsensusXMLFile.consensusXML")), FileTypes::CONSENSUSXML)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("TransformationXMLFile_1.trafoXML")), FileTypes::TRANSFORMATIONXML)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("FileHandler_toppas.toppas")), FileTypes::TOPPAS)
TEST_EQUAL(tmp.getType(OPENMS_GET_TEST_DATA_PATH("pepnovo.txt")), FileTypes::TXT)

TEST_EXCEPTION(Exception::FileNotFound, tmp.getType("/bli/bla/bluff"))
END_SECTION

START_SECTION((template < class PeakType > bool loadExperiment(const String &filename, MSExperiment< PeakType > &exp, FileTypes::Type force_type=FileTypes::UNKNOWN, ProgressLogger::LogType log=ProgressLogger::NONE, const bool compute_hash=true)))
FileHandler tmp;
PeakMap exp;
TEST_EQUAL(tmp.loadExperiment("test.bla", exp), false)
TEST_EQUAL(tmp.loadExperiment(OPENMS_GET_TEST_DATA_PATH("DTAFile_test.dta"), exp), true)

TEST_EQUAL(tmp.loadExperiment(OPENMS_GET_TEST_DATA_PATH("MzDataFile_1.mzData"), exp), true)
TEST_REAL_SIMILAR(exp[1][0].getPosition()[0], 110)
TEST_REAL_SIMILAR(exp[1][1].getPosition()[0], 120)
TEST_REAL_SIMILAR(exp[1][2].getPosition()[0], 130)

// starts with 110, so this one should skip the first
tmp.getOptions().setMZRange(DRange<1>(115, 1000));
TEST_EQUAL(tmp.loadExperiment(OPENMS_GET_TEST_DATA_PATH("MzDataFile_1.mzData"), exp), true)
TEST_REAL_SIMILAR(exp[1][0].getPosition()[0], 120)
TEST_REAL_SIMILAR(exp[1][1].getPosition()[0], 130)

tmp.getOptions() = PeakFileOptions();
TEST_EQUAL(tmp.loadExperiment(OPENMS_GET_TEST_DATA_PATH("MzXMLFile_1.mzXML"), exp), true)

Exemple #4

0

Afficher le fichier

Fichier : MascotAdapterOnline.cpp Projet : BioinformaticsArchive/OpenMS

  ExitCodes main_(int argc, const char** argv)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input/output files
    String in(getStringOption_("in")), out(getStringOption_("out"));
    FileHandler fh;
    FileTypes::Type in_type = fh.getType(in);

    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------

    PeakMap exp;
    // keep only MS2 spectra
    fh.getOptions().addMSLevel(2);
    fh.loadExperiment(in, exp, in_type, log_type_);
    writeDebug_(String("Spectra loaded: ") + exp.size(), 2);

    if (exp.getSpectra().empty())
    {
      throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file.");
    }

    // determine type of spectral data (profile or centroided)
    SpectrumSettings::SpectrumType spectrum_type = exp[0].getType();

    if (spectrum_type == SpectrumSettings::RAWDATA)
    {
      if (!getFlag_("force"))
      {
        throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag.");
      }
    }

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    Param mascot_param = getParam_().copy("Mascot_parameters:", true);
    MascotGenericFile mgf_file;
    Param p;
    // TODO: switch this to mzML (much smaller)
    p.setValue("internal:format", "Mascot generic", "Sets the format type of the peak list, this should not be changed unless you write the header only.", ListUtils::create<String>("advanced"));
    p.setValue("internal:HTTP_format", "true", "Write header with MIME boundaries instead of simple key-value pairs. For HTTP submission only.", ListUtils::create<String>("advanced"));
    p.setValue("internal:content", "all", "Use parameter header + the peak lists with BEGIN IONS... or only one of them.", ListUtils::create<String>("advanced"));
    mgf_file.setParameters(mascot_param);

    // get the spectra into string stream
    writeDebug_("Writing MGF file to stream", 1);
    stringstream ss;
    mgf_file.store(ss, in, exp, true); // write in compact format

    // Usage of a QCoreApplication is overkill here (and ugly too), but we just use the
    // QEventLoop to process the signals and slots and grab the results afterwards from
    // the MascotRemotQuery instance
    char** argv2 = const_cast<char**>(argv);
    QCoreApplication event_loop(argc, argv2);
    MascotRemoteQuery* mascot_query = new MascotRemoteQuery(&event_loop);
    Param mascot_query_param = getParam_().copy("Mascot_server:", true);
    writeDebug_("Setting parameters for Mascot query", 1);
    mascot_query->setParameters(mascot_query_param);
    writeDebug_("Setting spectra for Mascot query", 1);
    mascot_query->setQuerySpectra(ss.str());

    // remove unnecessary spectra
    ss.clear();

    QObject::connect(mascot_query, SIGNAL(done()), &event_loop, SLOT(quit()));
    QTimer::singleShot(1000, mascot_query, SLOT(run()));
    writeDebug_("Fire off Mascot query", 1);
    event_loop.exec();
    writeDebug_("Mascot query finished", 1);

    if (mascot_query->hasError())
    {
      writeLog_("An error occurred during the query: " + mascot_query->getErrorMessage());
      delete mascot_query;
      return EXTERNAL_PROGRAM_ERROR;
    }

    // write Mascot response to file
    String mascot_tmp_file_name(File::getTempDirectory() + "/" + File::getUniqueName() + "_Mascot_response");
    QFile mascot_tmp_file(mascot_tmp_file_name.c_str());
    mascot_tmp_file.open(QIODevice::WriteOnly);
    mascot_tmp_file.write(mascot_query->getMascotXMLResponse());
    mascot_tmp_file.close();

    // clean up
    delete mascot_query;

    vector<PeptideIdentification> pep_ids;
    ProteinIdentification prot_id;

    // set up mapping between scan numbers and retention times:
    MascotXMLFile::RTMapping rt_mapping;
    MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping);

    // read the response
    MascotXMLFile().load(mascot_tmp_file_name, prot_id, pep_ids, rt_mapping);
    writeDebug_("Read " + String(pep_ids.size()) + " peptide ids and " + String(prot_id.getHits().size()) + " protein identifications from Mascot", 5);

    // for debugging errors relating to unexpected response files
    if (this->debug_level_ >= 100)
    {
      writeDebug_(String("\nMascot Server Response file saved to: '") + mascot_tmp_file_name + "'. If an error occurs, send this file to the OpenMS team.\n", 100);
    }
    else
    {
      // delete file
      mascot_tmp_file.remove();
    }

    // keep or delete protein identifications?!
    vector<ProteinIdentification> prot_ids;
    if (!getFlag_("keep_protein_links"))
    {
      // remove protein links from peptides
      for (Size i = 0; i < pep_ids.size(); ++i)
      {
        std::vector<PeptideHit> hits = pep_ids[i].getHits();
        for (Size h = 0; h < hits.size(); ++h)
        {
          hits[h].setPeptideEvidences(vector<PeptideEvidence>());
        }
        pep_ids[i].setHits(hits);
      }
      // remove proteins
      std::vector<ProteinHit> p_hit;
      prot_id.setHits(p_hit);
    }
    prot_ids.push_back(prot_id);

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    IdXMLFile().store(out, prot_ids, pep_ids);

    return EXECUTION_OK;
  }

Exemple #5

0

Afficher le fichier

Fichier : IDFileConverter.cpp Projet : BioinformaticsArchive/OpenMS

  ExitCodes
  main_(int, const char**)
  {
    //-------------------------------------------------------------
    // general variables and data
    //-------------------------------------------------------------
    FileHandler fh;
    vector<PeptideIdentification> peptide_identifications;
    vector<ProteinIdentification> protein_identifications;

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    const String in = getStringOption_("in");

    ProgressLogger logger;
    logger.setLogType(ProgressLogger::CMD);
    logger.startProgress(0, 1, "Loading...");

    if (File::isDirectory(in))
    {
      const String in_directory = File::absolutePath(in).ensureLastChar('/');
      const String mz_file = getStringOption_("mz_file");
      const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide");

      UInt i = 0;
      FileHandler fh;
      FileTypes::Type type;
      MSExperiment<Peak1D> msexperiment;
      // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;)  However, now String::toInt() might throw.
      map<Int, float> num_and_rt;
      vector<String> NativeID;

      // The mz-File (if given)
      if (!mz_file.empty())
      {
        type = fh.getTypeByFileName(mz_file);
        fh.loadExperiment(mz_file, msexperiment, type);

        for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it)
        {
          String(spectra_it->getNativeID()).split('=', NativeID);
          try
          {
            num_and_rt[NativeID[1].toInt()] = spectra_it->getRT();
            // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01
          }
          catch (Exception::ConversionError& e)
          {
            writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
          }
        }
      }

      // Get list of the actual Sequest .out-Files
      StringList in_files;
      if (!File::fileList(in_directory, String("*.out"), in_files))
      {
        writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!");
      }

      // Now get to work ...
      for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it)
      {
        vector<PeptideIdentification> peptide_ids_seq;
        ProteinIdentification protein_id_seq;
        vector<double> pvalues_seq;
        vector<String> in_file_vec;

        SequestOutfile sequest_outfile;

        writeDebug_(String("Reading file ") + *in_files_it, 3);

        try
        {
          sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide);

          in_files_it->split('.', in_file_vec);

          for (Size j = 0; j < peptide_ids_seq.size(); ++j)
          {

            // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files.
            peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i);

            Int scan_number = 0;
            if (!mz_file.empty())
            {
              try
              {
                scan_number = in_file_vec[2].toInt();
                peptide_ids_seq[j].setRT(num_and_rt[scan_number]);
              }
              catch (Exception::ConversionError& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage());
              }
              catch (exception& e)
              {
                writeLog_(String("Error: Cannot read scan number as integer. '") + e.what());
              }
              //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz
              const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge();
              peptide_ids_seq[j].setMZ(real_mz);
            }

            writeDebug_(String("scan: ") + String(scan_number) + String("  RT: ") + String(peptide_ids_seq[j].getRT()) + "  MZ: " + String(peptide_ids_seq[j].getMZ()) + "  Ident: " + peptide_ids_seq[j].getIdentifier(), 4);

            peptide_identifications.push_back(peptide_ids_seq[j]);
          }

          protein_id_seq.setIdentifier(*in_files_it + "_" + i);
          protein_identifications.push_back(protein_id_seq);
          ++i;
        }
        catch (Exception::ParseError& pe)
        {
          writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")");
          throw;
        }
        catch (...)
        {
          writeLog_(String("Error reading file: ") + *in_files_it);
          throw;
        }
      }

      writeDebug_("All files processed.", 3);
    } // ! directory
    else
    {
      FileTypes::Type in_type = fh.getType(in);

      if (in_type == FileTypes::PEPXML)
      {
        String exp_name = getStringOption_("mz_file");
        String orig_name =  getStringOption_("mz_name");
        bool use_precursor_data = getFlag_("use_precursor_data");

        if (exp_name.empty())
        {
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, orig_name);
        }
        else
        {
          MSExperiment<> exp;
          fh.loadExperiment(exp_name, exp);
          if (!orig_name.empty())
          {
            exp_name = orig_name;
          }
          PepXMLFile().load(in, protein_identifications,
                            peptide_identifications, exp_name, exp,
                            use_precursor_data);
        }
      }
      else if (in_type == FileTypes::IDXML)
      {
        IdXMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::MZIDENTML)
      {
        LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl;
        MzIdentMLFile().load(in, protein_identifications, peptide_identifications);
      }
      else if (in_type == FileTypes::PROTXML)
      {
        protein_identifications.resize(1);
        peptide_identifications.resize(1);
        ProtXMLFile().load(in, protein_identifications[0],
                           peptide_identifications[0]);
      }
      else if (in_type == FileTypes::OMSSAXML)
      {
        protein_identifications.resize(1);
        OMSSAXMLFile().load(in, protein_identifications[0],
                            peptide_identifications, true);
      }
      else if (in_type == FileTypes::MASCOTXML)
      {
        String scan_regex = getStringOption_("scan_regex");
        String exp_name = getStringOption_("mz_file");
        MascotXMLFile::RTMapping rt_mapping;
        if (!exp_name.empty())
        {
          PeakMap exp;
          // load only MS2 spectra:
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping);
        }
        protein_identifications.resize(1);
        MascotXMLFile().load(in, protein_identifications[0],
                             peptide_identifications, rt_mapping, scan_regex);
      }
      else if (in_type == FileTypes::XML)
      {
        ProteinIdentification protein_id;
        XTandemXMLFile().load(in, protein_id, peptide_identifications);
        protein_id.setSearchEngineVersion("");
        protein_id.setSearchEngine("XTandem");
        protein_identifications.push_back(protein_id);
        String exp_name = getStringOption_("mz_file");
        if (!exp_name.empty())
        {
          PeakMap exp;
          fh.getOptions().addMSLevel(2);
          fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_);
          for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it)
          {
            UInt id = (Int)it->getMetaValue("spectrum_id");
            --id; // native IDs were written 1-based
            if (id < exp.size())
            {
              it->setRT(exp[id].getRT());
              double pre_mz(0.0);
              if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ();
              it->setMZ(pre_mz);
              it->removeMetaValue("spectrum_id");
            }
            else
            {
              LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl;
            }
          }
        }
      }
      else
      {
        writeLog_("Unknown input file type given. Aborting!");
        printUsage_();
        return ILLEGAL_PARAMETERS;
      }
    }
    logger.endProgress();

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------
    const String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));
    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }
    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    logger.startProgress(0, 1, "Storing...");
    if (out_type == FileTypes::PEPXML)
    {
      bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed");
      String mz_file = getStringOption_("mz_file");
      String mz_name = getStringOption_("mz_name");
      PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed);
    }
    else if (out_type == FileTypes::IDXML)
    {
      IdXMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::MZIDENTML)
    {
      MzIdentMLFile().store(out, protein_identifications, peptide_identifications);
    }
    else if (out_type == FileTypes::FASTA)
    {
      Size count = 0;
      ofstream fasta(out.c_str(), ios::out);
      for (Size i = 0; i < peptide_identifications.size(); ++i)
      {
        for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l)
        {
          const PeptideHit& hit = peptide_identifications[i].getHits()[l];
          fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++
                << "|" << hit.getSequence().toString() << endl;
          String seq = hit.getSequence().toUnmodifiedString();
          // FASTA files should have at most 60 characters of sequence info per line
          for (Size j = 0; j < seq.size(); j += 60)
          {
            Size k = min(j + 60, seq.size());
            fasta << string(seq[j], seq[k]) << endl;
          }
        }
      }
    }
    else
    {
      writeLog_("Unsupported output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }
    logger.endProgress();


    return EXECUTION_OK;
  }

Exemple #6

0

Afficher le fichier

Fichier : FileConverter.C Projet : BioITer/OpenMS

  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input file names
    String in = getStringOption_("in");

    //input file type
    FileHandler fh;
    FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type"));

    if (in_type == FileTypes::UNKNOWN)
    {
      in_type = fh.getType(in);
      writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2);
    }

    if (in_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine input file type!");
      return PARSE_ERROR;
    }


    //output file names and types
    String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));

    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }

    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    bool TIC_DTA2D = getFlag_("TIC_DTA2D");

    writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 1);

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    typedef MSExperiment<Peak1D> MSExperimentType;
    MSExperimentType exp;

    typedef MSExperimentType::SpectrumType SpectrumType;

    typedef FeatureMap<> FeatureMapType;

    FeatureMapType fm;
    ConsensusMap cm;

    writeDebug_(String("Loading input file"), 1);

    if (in_type == FileTypes::CONSENSUSXML)
    {
      ConsensusXMLFile().load(in, cm);
      cm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You you will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting consensus features to peaks. You will lose information!");
        exp.set2DData(cm);
      }
    }
    else if (in_type == FileTypes::EDTA)
    {
      EDTAFile().load(in, cm);
      cm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You you will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting consensus features to peaks. You will lose information!");
        exp.set2DData(cm);
      }
    }
    else if (in_type == FileTypes::FEATUREXML ||
             in_type == FileTypes::TSV ||
             in_type == FileTypes::PEPLIST ||
             in_type == FileTypes::KROENIK)
    {
      fh.loadFeatures(in, fm, in_type);
      fm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting features to peaks. You will lose information! Mass traces are added, if present as 'num_of_masstraces' and 'masstrace_intensity_<X>' (X>=0) meta values.");
        exp.set2DData<true>(fm);
      }
    }
    else
    {
      fh.loadExperiment(in, exp, in_type, log_type_);
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    writeDebug_(String("Writing output file"), 1);

    if (out_type == FileTypes::MZML)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZML));
      MzMLFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertSpectraToChromatograms(exp, true);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::MZDATA)
    {
      //annotate output with data processing info
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZDATA));
      MzDataFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::MZXML)
    {
      //annotate output with data processing info
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZXML));
      MzXMLFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::DTA2D)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 FORMAT_CONVERSION));
      DTA2DFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      if (TIC_DTA2D)
      {
        // store the total ion chromatogram (TIC)
        f.storeTIC(out, exp);
      }
      else
      {
        // store entire experiment
        f.store(out, exp);
      }


    }
    else if (out_type == FileTypes::MGF)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 FORMAT_CONVERSION));
      MascotGenericFile f;
      f.setLogType(log_type_);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::FEATUREXML)
    {
      if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) ||
          (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK))
      {
        fm.applyMemberFunction(&UniqueIdInterface::setUniqueId);
      }
      else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA)
      {
        ConsensusMap::convert(cm, true, fm);
      }
      else // not loaded as feature map or consensus map
      {
        // The feature specific information is only defaulted. Enough reasons to issue a warning!
        writeLog_("Warning: Converting peaks to features will lead to incomplete features!");
        fm.clear();
        fm.reserve(exp.getSize());
        typedef FeatureMapType::FeatureType FeatureType;
        FeatureType feature;
        feature.setQuality(0, 1); // override default
        feature.setQuality(1, 1); // override default
        feature.setOverallQuality(1); // override default
        for (MSExperimentType::ConstIterator spec_iter = exp.begin();
             spec_iter != exp.end();
             ++spec_iter
             )
        {
          feature.setRT(spec_iter->getRT());
          for (SpectrumType::ConstIterator peak1_iter = spec_iter->begin();
               peak1_iter != spec_iter->end();
               ++peak1_iter
               )
          {
            feature.setMZ(peak1_iter->getMZ());
            feature.setIntensity(peak1_iter->getIntensity());
            feature.setUniqueId();
            fm.push_back(feature);
          }
        }
        fm.updateRanges();
      }

      addDataProcessing_(fm, getProcessingInfo_(DataProcessing::
                                                FORMAT_CONVERSION));
      FeatureXMLFile().store(out, fm);
    }
    else if (out_type == FileTypes::CONSENSUSXML)
    {
      if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) ||
          (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK))
      {
        fm.applyMemberFunction(&UniqueIdInterface::setUniqueId);
        ConsensusMap::convert(0, fm, cm);
      }
      // nothing to do for consensus input
      else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA)
      {
      }
      else // experimental data
      {
        ConsensusMap::convert(0, exp, cm, exp.size());
      }

      addDataProcessing_(cm, getProcessingInfo_(DataProcessing::
                                                FORMAT_CONVERSION));
      ConsensusXMLFile().store(out, cm);
    }
    else if (out_type == FileTypes::EDTA)
    {
      if (fm.size() > 0 && cm.size() > 0)
      {
        LOG_ERROR << "Internal error: cannot decide on container (Consensus or Feature)! This is a bug. Please report it!";
        return INTERNAL_ERROR;
      }
      if (fm.size() > 0) EDTAFile().store(out, fm);
      else if (cm.size() > 0) EDTAFile().store(out, cm);
    }
    else
    {
      writeLog_("Unknown output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    return EXECUTION_OK;
  }

Exemple #7

0

Afficher le fichier

Fichier : MyriMatchAdapter.cpp Projet : BioinformaticsArchive/OpenMS

  ExitCodes main_(int, const char**)
  {
    String tmp_dir = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files
    {
      QDir d;
      d.mkpath(tmp_dir.toQString());
    }
    String logfile(getStringOption_("log"));
    String myrimatch_executable(getStringOption_("myrimatch_executable"));

    //-------------------------------------------------------------
    // get version of MyriMatch
    //-------------------------------------------------------------

    QProcess qp;
    String myrimatch_version;
    MyriMatchVersion myrimatch_version_i;

    // we invoke myrimatch w/o arguments. that yields a return code != 0. but
    // there is no other way for version 2.1 to get the version number
    qp.start(myrimatch_executable.toQString(), QStringList(), QIODevice::ReadOnly); // does automatic escaping etc...
    qp.waitForFinished();
    String output(QString(qp.readAllStandardOutput()));

    vector<String> lines;
    vector<String> version_split;
    output.split('\n', lines);

    // the version number is expected to be in the second line
    if (lines.size() < 2)
    {
      writeLog_("Warning: MyriMatch version output (" + output + ") not formatted as expected!");
      return EXTERNAL_PROGRAM_ERROR;
    }

    // the version is expected to be something like:
    // MyriMatch 2.1.111 (2011-12-27)
    lines[1].split(' ', version_split);
    if (version_split.size() == 3 && getVersion_(version_split[1], myrimatch_version_i))
    {
      myrimatch_version = version_split[1].removeWhitespaces();
      writeDebug_("Setting MyriMatch version to " + myrimatch_version, 1);
    }
    else
    {
      writeLog_("Warning: MyriMatch version output (" + output + ") not formatted as expected!");
      return EXTERNAL_PROGRAM_ERROR;
    }
    if (myrimatch_version_i.myrimatch_major != 2 && myrimatch_version_i.myrimatch_minor != 1)
    {
      writeDebug_("Warning: unsupported MyriMatch version (" + myrimatch_version + "). Tested only for MyriMatch 2.1.x", 0);
    }

    //-------------------------------------------------------------
    // Validate user parameters
    //-------------------------------------------------------------
    if (getIntOption_("min_precursor_charge") > getIntOption_("max_precursor_charge"))
    {
      LOG_ERROR << "Given charge range is invalid: max_precursor_charge needs to be >= min_precursor_charge." << std::endl;
      return ILLEGAL_PARAMETERS;
    }


    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------

    String inputfile_name = File::absolutePath(getStringOption_("in"));
    String outputfile_name = getStringOption_("out");
    String db_name = File::absolutePath(String(getStringOption_("database")));

    // building parameter String
    StringList parameters;

    // Common Identification engine options
    StringList static_mod_list;
    StringList dynamic_mod_list;
    translateModifications(static_mod_list, dynamic_mod_list);
    if (!static_mod_list.empty())
      parameters << "-StaticMods" << ListUtils::concatenate(static_mod_list, " ");
    if (!dynamic_mod_list.empty())
      parameters << "-DynamicMods" << ListUtils::concatenate(dynamic_mod_list, " ");

    parameters << "-ProteinDatabase"  << File::absolutePath(db_name);

    if (getFlag_("precursor_mass_tolerance_avg"))
    {
      parameters << "-AvgPrecursorMzTolerance";
    }
    else
    {
      parameters << "-MonoPrecursorMzTolerance";
    }
    String precursor_mass_tolerance_unit = getStringOption_("precursor_mass_tolerance_unit") == "Da" ? " m/z" : " ppm";
    parameters << String(getDoubleOption_("precursor_mass_tolerance")) + precursor_mass_tolerance_unit;

    String fragment_mass_tolerance_unit = getStringOption_("fragment_mass_tolerance_unit");
    if (fragment_mass_tolerance_unit == "Da")
    {
      fragment_mass_tolerance_unit = "m/z";
    }

    parameters << "-FragmentMzTolerance" << String(getDoubleOption_("fragment_mass_tolerance")) + " " + fragment_mass_tolerance_unit;
    int min_charge = getIntOption_("min_precursor_charge");
    int max_charge = getIntOption_("max_precursor_charge");
    parameters << "-SpectrumListFilters" << "chargeStatePredictor false " +  String(max_charge) + " " +  String(min_charge) + " 0.9";
    //parameters << "-ThreadCountMultiplier" << String(getIntOption_("threads")); // MyriMatch does not recognise this, even though it's in the manual.

    // MyriMatch specific parameters
    parameters << "-NumChargeStates" << getIntOption_("NumChargeStates");
    parameters << "-TicCutoffPercentage" << String(getDoubleOption_("TicCutoffPercentage"));
    parameters << "-MaxDynamicMods" << getIntOption_("MaxDynamicMods");
    parameters << "-MaxResultRank" << getIntOption_("MaxResultRank");
    parameters << "-MinTerminiCleavages" << getIntOption_("MinTerminiCleavages");
    parameters << "-MaxMissedCleavages" << getIntOption_("MaxMissedCleavages");
    String cleavage_rule = getStringOption_("CleavageRules");
    if (cleavage_rule.empty())
    {
      cleavage_rule = "Trypsin/P";
    }
    parameters << "-CleavageRules" << cleavage_rule;

    // advanced parameters
    parameters << "-MinPeptideMass"   << getDoubleOption_("MinPeptideMass");
    parameters << "-MaxPeptideMass"   << getDoubleOption_("MaxPeptideMass");
    parameters << "-MinPeptideLength" << getIntOption_("MinPeptideLength");
    parameters << "-MaxPeptideLength" << getIntOption_("MaxPeptideLength");
    parameters << "-NumIntensityClasses" << getIntOption_("NumIntensityClasses");
    parameters << "-ClassSizeMultiplier" << getDoubleOption_("ClassSizeMultiplier");
    parameters << "-MonoisotopeAdjustmentSet" << getStringOption_("MonoisotopeAdjustmentSet");
    parameters << "-cpus" << getIntOption_("threads");


    // Constant parameters

    // DecoyPrefix worked only when set through the config file
    String cfg_file = tmp_dir + "myrimatch.cfg";
    ofstream f(cfg_file.c_str());
    f << "DecoyPrefix=\"\"\n";
    f.close();
    parameters << "-cfg" << cfg_file;

    // path to input file must be the last parameter
    parameters << inputfile_name;

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    QStringList qparam;
    writeDebug_("MyriMatch arguments:", 1);
    writeDebug_(String("\"") + ListUtils::concatenate(parameters, "\" \"") + "\"", 1);
    for (Size i = 0; i < parameters.size(); ++i)
    {
      qparam << parameters[i].toQString();
    }
    QProcess process;

    // Bad style, because it breaks relative paths?
    process.setWorkingDirectory(tmp_dir.toQString());

    process.start(myrimatch_executable.toQString(), qparam, QIODevice::ReadOnly);
    bool success = process.waitForFinished(-1);
    String myri_msg(QString(process.readAllStandardOutput()));
    String myri_err(QString(process.readAllStandardError()));
    writeDebug_(myri_msg, 1);
    writeDebug_(myri_err, 0);
    if (!success || process.exitStatus() != 0 || process.exitCode() != 0)
    {
      writeLog_("Error: MyriMatch problem! (Details can be seen in the logfile: \"" + logfile + "\")");
      writeLog_("Note: This message can also be triggered if you run out of space in your tmp directory");
      return EXTERNAL_PROGRAM_ERROR;
    }

    //-------------------------------------------------------------
    // reading MyriMatch output
    //-------------------------------------------------------------

    writeDebug_("Reading output of MyriMatch", 5);
    String exp_name = File::basename(inputfile_name);
    String pep_file = tmp_dir + File::removeExtension(exp_name) + ".pepXML";

    FileHandler fh;
    MSExperiment<> exp;
    fh.loadExperiment(inputfile_name, exp);

    vector<ProteinIdentification> protein_identifications;
    vector<PeptideIdentification> peptide_identifications;
    if (File::exists(pep_file))
    {
      const bool use_precursor_data = false;
      PepXMLFile().load(pep_file, protein_identifications, peptide_identifications,
                        exp_name, exp, use_precursor_data);
    }
    else
    {
      writeLog_("Error: MyriMatch problem! No pepXML output file (expected as '" + pep_file + "') was generated by MyriMatch.");
      writeLog_("Note: This message can be triggered if no MS2 spectra were found or no identifications were made.");
      writeLog_("      Myrimatch expects MS2 spectra in mzML files to contain the MSn tag. MSSpectrum with MS level 2 is not sufficient. You can use FileConverter to create such an mzML file by converting from mzML --> mzXML --> mzML.");
      return EXTERNAL_PROGRAM_ERROR;
    }

    if (debug_level_ == 0)
    {
      QFile(pep_file.toQString()).remove();
      QFile(cfg_file.toQString()).remove();
    }
    else
    {
      writeDebug_(String("Not removing '") + pep_file + "' for debugging purposes. Please delete manually!", 1);
      writeDebug_(String("Not removing '") + cfg_file + "' for debugging purposes. Please delete manually!", 1);
    }
    //-------------------------------------------------------------
    // writing results
    //-------------------------------------------------------------

    IdXMLFile().store(outputfile_name, protein_identifications, peptide_identifications);
    return EXECUTION_OK;
  }

Exemple #8

0

Afficher le fichier

Fichier : FileConverter.cpp Projet : BioinformaticsArchive/OpenMS

  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    //input file names
    String in = getStringOption_("in");
    bool write_mzML_index = getFlag_("write_mzML_index");

    //input file type
    FileHandler fh;
    FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type"));

    if (in_type == FileTypes::UNKNOWN)
    {
      in_type = fh.getType(in);
      writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2);
    }

    if (in_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine input file type!");
      return PARSE_ERROR;
    }


    //output file names and types
    String out = getStringOption_("out");
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));

    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(out);
    }

    if (out_type == FileTypes::UNKNOWN)
    {
      writeLog_("Error: Could not determine output file type!");
      return PARSE_ERROR;
    }

    bool TIC_DTA2D = getFlag_("TIC_DTA2D");
    bool process_lowmemory = getFlag_("process_lowmemory");

    writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 1);

    String uid_postprocessing = getStringOption_("UID_postprocessing");
    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    typedef MSExperiment<Peak1D> MSExperimentType;
    MSExperimentType exp;

    typedef MSExperimentType::SpectrumType SpectrumType;

    typedef FeatureMap FeatureMapType;

    FeatureMapType fm;
    ConsensusMap cm;

    writeDebug_(String("Loading input file"), 1);

    if (in_type == FileTypes::CONSENSUSXML)
    {
      ConsensusXMLFile().load(in, cm);
      cm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You you will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting consensus features to peaks. You will lose information!");
        exp.set2DData(cm);
      }
    }
    else if (in_type == FileTypes::EDTA)
    {
      EDTAFile().load(in, cm);
      cm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You you will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting consensus features to peaks. You will lose information!");
        exp.set2DData(cm);
      }
    }
    else if (in_type == FileTypes::FEATUREXML ||
             in_type == FileTypes::TSV ||
             in_type == FileTypes::PEPLIST ||
             in_type == FileTypes::KROENIK)
    {
      fh.loadFeatures(in, fm, in_type);
      fm.sortByPosition();
      if ((out_type != FileTypes::FEATUREXML) &&
          (out_type != FileTypes::CONSENSUSXML))
      {
        // You will lose information and waste memory. Enough reasons to issue a warning!
        writeLog_("Warning: Converting features to peaks. You will lose information! Mass traces are added, if present as 'num_of_masstraces' and 'masstrace_intensity_<X>' (X>=0) meta values.");
        exp.set2DData<true>(fm);
      }
    }
    else if (process_lowmemory)
    {
      // Special switch for the low memory options:
      // We can transform the complete experiment directly without first
      // loading the complete data into memory. PlainMSDataWritingConsumer will
      // write out mzML to disk as they are read from the input.
      if (in_type == FileTypes::MZML && out_type == FileTypes::MZML)
      {
        PlainMSDataWritingConsumer consumer(out);
        consumer.getOptions().setWriteIndex(write_mzML_index);
        consumer.addDataProcessing(getProcessingInfo_(DataProcessing::CONVERSION_MZML));
        MzMLFile mzmlfile; 
        mzmlfile.setLogType(log_type_);
        mzmlfile.transform(in, &consumer);
        return EXECUTION_OK;
      }
      else if (in_type == FileTypes::MZXML && out_type == FileTypes::MZML)
      {
        PlainMSDataWritingConsumer consumer(out);
        consumer.getOptions().setWriteIndex(write_mzML_index);
        consumer.addDataProcessing(getProcessingInfo_(DataProcessing::CONVERSION_MZML));
        MzXMLFile mzxmlfile; 
        mzxmlfile.setLogType(log_type_);
        mzxmlfile.transform(in, &consumer);
        return EXECUTION_OK;
      }
      else
      {
        throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__,
          "Process_lowmemory option can only be used with mzML / mzXML input and mzML output data types.");
      }
    }
    else
    {
      fh.loadExperiment(in, exp, in_type, log_type_);
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    writeDebug_(String("Writing output file"), 1);

    if (out_type == FileTypes::MZML)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZML));
      MzMLFile f;
      f.setLogType(log_type_);
      f.getOptions().setWriteIndex(write_mzML_index);
      ChromatogramTools().convertSpectraToChromatograms(exp, true);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::MZDATA)
    {
      //annotate output with data processing info
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZDATA));
      MzDataFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::MZXML)
    {
      //annotate output with data processing info
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 CONVERSION_MZXML));
      MzXMLFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      f.store(out, exp);
    }
    else if (out_type == FileTypes::DTA2D)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 FORMAT_CONVERSION));
      DTA2DFile f;
      f.setLogType(log_type_);
      ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp);
      if (TIC_DTA2D)
      {
        // store the total ion chromatogram (TIC)
        f.storeTIC(out, exp);
      }
      else
      {
        // store entire experiment
        f.store(out, exp);
      }


    }
    else if (out_type == FileTypes::MGF)
    {
      //add data processing entry
      addDataProcessing_(exp, getProcessingInfo_(DataProcessing::
                                                 FORMAT_CONVERSION));
      MascotGenericFile f;
      f.setLogType(log_type_);
      f.store(out, exp, getFlag_("MGF_compact"));
    }
    else if (out_type == FileTypes::FEATUREXML)
    {
      if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) ||
          (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK))
      {
        if (uid_postprocessing == "ensure")
        {
          fm.applyMemberFunction(&UniqueIdInterface::ensureUniqueId);
        } else if (uid_postprocessing == "reassign")
        {
          fm.applyMemberFunction(&UniqueIdInterface::setUniqueId);
        }
      }
      else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA)
      {
        MapConversion::convert(cm, true, fm);
      }
      else // not loaded as feature map or consensus map
      {
        // The feature specific information is only defaulted. Enough reasons to issue a warning!
        writeLog_("Warning: Converting peaks to features will lead to incomplete features!");
        fm.clear();
        fm.reserve(exp.getSize());
        typedef FeatureMapType::FeatureType FeatureType;
        FeatureType feature;
        feature.setQuality(0, 1); // override default
        feature.setQuality(1, 1); // override default
        feature.setOverallQuality(1); // override default
        for (MSExperimentType::ConstIterator spec_iter = exp.begin();
             spec_iter != exp.end();
             ++spec_iter
             )
        {
          feature.setRT(spec_iter->getRT());
          for (SpectrumType::ConstIterator peak1_iter = spec_iter->begin();
               peak1_iter != spec_iter->end();
               ++peak1_iter
               )
          {
            feature.setMZ(peak1_iter->getMZ());
            feature.setIntensity(peak1_iter->getIntensity());
            feature.setUniqueId();
            fm.push_back(feature);
          }
        }
        fm.updateRanges();
      }

      addDataProcessing_(fm, getProcessingInfo_(DataProcessing::
                                                FORMAT_CONVERSION));
      FeatureXMLFile().store(out, fm);
    }
    else if (out_type == FileTypes::CONSENSUSXML)
    {
      if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) ||
          (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK))
      {
        if (uid_postprocessing == "ensure")
        {
          fm.applyMemberFunction(&UniqueIdInterface::ensureUniqueId);
        } else if (uid_postprocessing == "reassign")
        {
          fm.applyMemberFunction(&UniqueIdInterface::setUniqueId);
        }
        MapConversion::convert(0, fm, cm);
      }
      // nothing to do for consensus input
      else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA)
      {
      }
      else // experimental data
      {
        MapConversion::convert(0, exp, cm, exp.size());
      }

      addDataProcessing_(cm, getProcessingInfo_(DataProcessing::
                                                FORMAT_CONVERSION));
      ConsensusXMLFile().store(out, cm);
    }
    else if (out_type == FileTypes::EDTA)
    {
      if (fm.size() > 0 && cm.size() > 0)
      {
        LOG_ERROR << "Internal error: cannot decide on container (Consensus or Feature)! This is a bug. Please report it!";
        return INTERNAL_ERROR;
      }
      if (fm.size() > 0) EDTAFile().store(out, fm);
      else if (cm.size() > 0) EDTAFile().store(out, cm);
    }
    else if (out_type == FileTypes::CSV)
    {
      // as ibspectra is currently the only csv/text based format we assume
      // that out_type == FileTypes::CSV means ibspectra, if more formats
      // are added we need a more intelligent strategy to decide which
      // conversion is requested

      // IBSpectra selected as output type
      if (in_type != FileTypes::CONSENSUSXML)
      {
        LOG_ERROR << "Incompatible input data: FileConverter can only convert consensusXML files to ibspectra format.";
        return INCOMPATIBLE_INPUT_DATA;
      }

      IBSpectraFile ibfile;
      ibfile.store(out, cm);
    }
    else
    {
      writeLog_("Unknown output file type given. Aborting!");
      printUsage_();
      return ILLEGAL_PARAMETERS;
    }

    return EXECUTION_OK;
  }