Example #1
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in                   = getStringOption_("in");
    String out                  = getStringOption_("out");
    String target_run           = getStringOption_("name");
    String target_file          = getStringOption_("run");
    StringList qp_accs          = getStringList_("qp_accessions");

    //-------------------------------------------------------------
    // reading input
    //------------------------------------------------------------
    if (target_file != "")
    {
      target_run = QFileInfo(QString::fromStdString(target_file)).baseName();
    }

    //~ !getFlag_("tables")

    QcMLFile qcmlfile;
    qcmlfile.load(in);

    if (qp_accs.empty())
    {
      qp_accs.push_back("QC:0000044");
      qp_accs.push_back("QC:0000047");
      qp_accs.push_back("QC:0000022");
      qp_accs.push_back("QC:0000038");
      qp_accs.push_back("QC:0000049");
    }

    //TODO care for QualityParameter s
    if (target_run == "")
    {
      for (Size i = 0; i < qp_accs.size(); ++i)
      {
        qcmlfile.removeAllAttachments(qp_accs[i]);
      }
    }
    else
    {
      for (Size i = 0; i < qp_accs.size(); ++i)
      {
        qcmlfile.removeAttachment(target_run,qp_accs[i]);
      }
    }

    qcmlfile.store(out);
    return EXECUTION_OK;
  }
Example #2
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in                   = getStringOption_("in");
    String out                 = getStringOption_("out");
    String target_qp       = getStringOption_("qp_att_acc");
    String target_acc      = getStringOption_("cv_acc");
    String target_run      = getStringOption_("name");
    String target_file       = getStringOption_("run");
    String plot_file          = getStringOption_("plot");
    String tab                 = getStringOption_("table");

    //-------------------------------------------------------------
    // fetch vocabularies
    //------------------------------------------------------------    
    ControlledVocabulary cv;
    cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo"));
    cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo"));
    
    //-------------------------------------------------------------
    // reading input
    //------------------------------------------------------------
    if (target_file != "")
    {
      target_run = QFileInfo(QString::fromStdString(target_file)).baseName();
    }

    QcMLFile qcmlfile;
    if (in != "")
    {
      qcmlfile.load(in);
    }

    if (target_run == "")
    {
      //~ check if only one run in file
      std::vector<String> nas;
      qcmlfile.getRunNames(nas);
      if (nas.size() == 1)
      {
        target_run = nas.front();
      }
      else
      {
        cerr << "Error: You have to give at least one of the following parameter (in ascending precedence): name, run. Aborting!" << endl;
        return ILLEGAL_PARAMETERS;
      }
    }    

    QFile f(plot_file.c_str());
    String plot_b64;
    if (f.open(QIODevice::ReadOnly))
    {
      QByteArray ba = f.readAll();
      f.close();
      plot_b64 = String(QString(ba.toBase64()));
    }
    
    QcMLFile::Attachment at;
    at.cvAcc = target_acc;
    at.id = String(UniqueIdGenerator::getUniqueId());
    at.cvRef = "QC"; //TODO assign right cv reference

    if (plot_b64 != "" || tab != "")
    {
      if (plot_b64 != "")
      {
        try
        {
          const ControlledVocabulary::CVTerm& term = cv.getTerm(target_acc);
          at.name = term.name; ///< Name          
          //~ at.unitRef; //TODO MIME type
          //~ at.unitAcc;
        }
        catch (...)
        {
          cerr << "Error: You have to give the accession of a existing cv term. Aborting!" << endl;
          return ILLEGAL_PARAMETERS;
        }
        at.binary = plot_b64;
      }
      else if (tab != "")
      {        
        try
        {
          const ControlledVocabulary::CVTerm& term = cv.getTerm(target_acc);
          at.name = term.name; ///< Name
          //~ at.unitRef; //TODO MIME type
          //~ at.unitAcc;
        }
        catch (...)
        {
          cerr << "Error: You have to give the accession of a existing cv term. Aborting!" << endl;
          return ILLEGAL_PARAMETERS;
        }
        
        CsvFile csv_file(tab);
        if (csv_file.size()>1)
        {
          StringList li;
          csv_file.getRow(0, li);
          for (Size i = 0; i < li.size(); ++i)
          {
            at.colTypes.push_back(li[i]);
          }
          for (UInt i = 1; i < csv_file.size(); ++i)
          {
            StringList li;
            std::vector<String> v;
            csv_file.getRow(i, li);
            //TODO throw error if li.size() != at.colTypes.size()
            for (Size i = 0; i < li.size(); ++i)
            {
              v.push_back(li[i]);
            }
            at.tableRows.push_back(v);
          }
        }
      }
      else
      {
        cerr << "Error: Nothing valid to attach. Aborting!" << endl;
        return ILLEGAL_PARAMETERS;
      }

      std::vector<String> ids;
      qcmlfile.existsRunQualityParameter(target_run, target_qp, ids);
      if (!ids.empty())
      {
        at.qualityRef = ids.front();
        qcmlfile.addRunAttachment(target_run, at);
      }
      else
      {
        qcmlfile.existsSetQualityParameter(target_run, target_qp, ids);
        if (!ids.empty())
        {
          at.qualityRef = ids.front();
          qcmlfile.addSetAttachment(target_run, at);
        }
        else
        {
          cerr << "Error: You have to give the accession of a existing cv term to attacht to. Aborting!" << endl;
          return ILLEGAL_PARAMETERS;
        }
      }
    }
    qcmlfile.store(out);
    return EXECUTION_OK;
  }
  ExitCodes main_(int, const char**)
  {
    vector<ProteinIdentification> prot_ids;
    vector<PeptideIdentification> pep_ids;
    ProteinHit temp_protein_hit;

    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String inputfile_id               = getStringOption_("id");
    String inputfile_feature       = getStringOption_("feature");
    String inputfile_consensus  = getStringOption_("consensus");
    String inputfile_raw            = getStringOption_("in");
    String outputfile_name       = getStringOption_("out");

    //~ bool Ms1(getFlag_("MS1"));
    //~ bool Ms2(getFlag_("MS2"));
    bool remove_duplicate_features(getFlag_("remove_duplicate_features"));
    
    //-------------------------------------------------------------
    // fetch vocabularies
    //------------------------------------------------------------
    ControlledVocabulary cv;
    cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo"));
    cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo"));
 
     QcMLFile qcmlfile;

    //-------------------------------------------------------------
    // MS  aqiusition
    //------------------------------------------------------------
    String base_name = QFileInfo(QString::fromStdString(inputfile_raw)).baseName();

    cout << "Reading mzML file..." << endl;
    MzMLFile mz_data_file;
    MSExperiment<Peak1D> exp;
    MzMLFile().load(inputfile_raw, exp);
    
    //---prep input
    exp.sortSpectra();
    UInt min_mz = std::numeric_limits<UInt>::max();
    UInt max_mz = 0;
    std::map<Size, UInt> mslevelcounts;
    
    qcmlfile.registerRun(base_name,base_name); //TODO use UIDs
    
    //---base MS aquisition qp
    String msaq_ref = base_name + "_msaq";
    QcMLFile::QualityParameter qp;
    qp.id = msaq_ref; ///< Identifier
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000004";
    try
    {
      //~ const ControlledVocabulary::CVTerm& test = cv.getTermByName("MS aquisition result details");
      //~ cout << test.name << test.id << endl;
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      //~ const ControlledVocabulary::CVTerm& term = cv.getTerm("0000004");
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "mzML file"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    //---file origin qp
    qp = QcMLFile::QualityParameter();
    qp.name = "mzML file"; ///< Name
    qp.id = base_name + "_run_name"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000577";
    qp.value = base_name;
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    qp = QcMLFile::QualityParameter();
    qp.name = "instrument model"; ///< Name
    qp.id = base_name + "_instrument_name"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000031";
    qp.value = exp.getInstrument().getName();
    qcmlfile.addRunQualityParameter(base_name, qp);    

    qp = QcMLFile::QualityParameter();
    qp.name = "completion time"; ///< Name
    qp.id = base_name + "_date"; ///< Identifier
    qp.cvRef = "MS"; ///< cv reference
    qp.cvAcc = "MS:1000747";
    qp.value = exp.getDateTime().getDate();
    qcmlfile.addRunQualityParameter(base_name, qp);

    //---precursors at
    QcMLFile::Attachment at;
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000044";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_precursors"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "precursors"; ///< Name
    }

    at.colTypes.push_back("MS:1000894_[sec]"); //RT
    at.colTypes.push_back("MS:1000040"); //MZ
    for (Size i = 0; i < exp.size(); ++i)
    {
      mslevelcounts[exp[i].getMSLevel()]++;
      if (exp[i].getMSLevel() == 2)
      {
        if (exp[i].getPrecursors().front().getMZ() < min_mz)
        {
          min_mz = exp[i].getPrecursors().front().getMZ();
        }
        if (exp[i].getPrecursors().front().getMZ() > max_mz)
        {
          max_mz = exp[i].getPrecursors().front().getMZ();
        }
        std::vector<String> row;
        row.push_back(exp[i].getRT());
        row.push_back(exp[i].getPrecursors().front().getMZ());
        at.tableRows.push_back(row);
      }
    }
    qcmlfile.addRunAttachment(base_name, at);

    //---aquisition results qp
    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000006"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_ms1aquisition"; ///< Identifier
    qp.value = String(mslevelcounts[1]);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of ms1 spectra"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    

    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000007"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_ms2aquisition"; ///< Identifier
    qp.value = String(mslevelcounts[2]);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of ms2 spectra"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);

    qp = QcMLFile::QualityParameter();
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000008"; ///< cv accession for "aquisition results"
    qp.id = base_name + "_Chromaquisition"; ///< Identifier
    qp.value = String(exp.getChromatograms().size());
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "number of chromatograms"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);
    
    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000009";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_mzrange"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS MZ aquisition ranges"; ///< Name
    }

    at.colTypes.push_back("QC:0000010"); //MZ
    at.colTypes.push_back("QC:0000011"); //MZ
    std::vector<String> rowmz;
    rowmz.push_back(String(min_mz));
    rowmz.push_back(String(max_mz));
    at.tableRows.push_back(rowmz);
    qcmlfile.addRunAttachment(base_name, at);

    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000012";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_rtrange"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS RT aquisition ranges"; ///< Name
    }

    at.colTypes.push_back("QC:0000013"); //MZ
    at.colTypes.push_back("QC:0000014"); //MZ
    std::vector<String> rowrt;
    rowrt.push_back(String(exp.begin()->getRT()));
    rowrt.push_back(String(exp.getSpectra().back().getRT()));
    at.tableRows.push_back(rowrt);
    qcmlfile.addRunAttachment(base_name, at);
    

    //---ion current stability ( & tic ) qp
    at = QcMLFile::Attachment();
    at.cvRef = "QC"; ///< cv reference
    at.cvAcc = "QC:0000022";
    at.qualityRef = msaq_ref;
    at.id = base_name + "_tics"; ///< Identifier
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
      at.name = term.name; ///< Name
    }
    catch (...)
    {
      at.name = "MS TICs"; ///< Name
    }
    
    at.colTypes.push_back("MS:1000894_[sec]");
    at.colTypes.push_back("MS:1000285");
    UInt max = 0;
    Size below_10k = 0;
    for (Size i = 0; i < exp.size(); ++i)
    {
      if (exp[i].getMSLevel() == 1)
      {
        UInt sum = 0;
        for (Size j = 0; j < exp[i].size(); ++j)
        {
          sum += exp[i][j].getIntensity();
        }
        if (sum > max)
        {
          max = sum;
        }
        if (sum < 10000)
        {
          ++below_10k;
        }
        std::vector<String> row;
        row.push_back(exp[i].getRT());
        row.push_back(sum);
        at.tableRows.push_back(row);
      }
    }
    qcmlfile.addRunAttachment(base_name, at);
    

    qp = QcMLFile::QualityParameter();
    qp.id = base_name + "_ticslump"; ///< Identifier
    qp.cvRef = "QC"; ///< cv reference
    qp.cvAcc = "QC:0000023";
    qp.value = String((100 / exp.size()) * below_10k);
    try
    {
      const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
      qp.name = term.name; ///< Name
    }
    catch (...)
    {
      qp.name = "percentage of tic slumps"; ///< Name
    }
    qcmlfile.addRunQualityParameter(base_name, qp);

    
    //-------------------------------------------------------------
    // MS  id
    //------------------------------------------------------------
    if (inputfile_id != "")
    {
      IdXMLFile().load(inputfile_id, prot_ids, pep_ids);
      cerr << "idXML read ended. Found " << pep_ids.size() << " peptide identifications." << endl;

      ProteinIdentification::SearchParameters params = prot_ids[0].getSearchParameters();
      vector<String> var_mods = params.variable_modifications;
      //~ boost::regex re("(?<=[KR])(?=[^P])");
     
      String msid_ref = base_name + "_msid";
      QcMLFile::QualityParameter qp;
      qp.id = msid_ref; ///< Identifier
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000025";
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "MS identification result details"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000026";
      at.qualityRef = msid_ref;
      at.id = base_name + "_idsetting"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "MS id settings"; ///< Name
      }
      
      at.colTypes.push_back("MS:1001013"); //MS:1001013 db name  MS:1001016 version  MS:1001020 taxonomy
      at.colTypes.push_back("MS:1001016");
      at.colTypes.push_back("MS:1001020");
      std::vector<String> row;
      row.push_back(String(prot_ids.front().getSearchParameters().db));
      row.push_back(String(prot_ids.front().getSearchParameters().db_version));
      row.push_back(String(prot_ids.front().getSearchParameters().taxonomy));
      at.tableRows.push_back(row);
      qcmlfile.addRunAttachment(base_name, at);


      UInt spectrum_count = 0;
      Size peptide_hit_count = 0;
      UInt runs_count = 0;
      Size protein_hit_count = 0;
      set<String> peptides;
      set<String> proteins;
      Size missedcleavages = 0;
      for (Size i = 0; i < pep_ids.size(); ++i)
      {
        if (!pep_ids[i].empty())
        {
          ++spectrum_count;
          peptide_hit_count += pep_ids[i].getHits().size();
          const vector<PeptideHit>& temp_hits = pep_ids[i].getHits();
          for (Size j = 0; j < temp_hits.size(); ++j)
          {
            peptides.insert(temp_hits[j].getSequence().toString());
          }
        }
      }
      for (set<String>::iterator it = peptides.begin(); it != peptides.end(); ++it)
      {
        for (String::const_iterator st = it->begin(); st != it->end() - 1; ++st)
        {
          if (*st == 'K' || *st == 'R')
          {
            ++missedcleavages;
          }
        }
      }

      for (Size i = 0; i < prot_ids.size(); ++i)
      {
        ++runs_count;
        protein_hit_count += prot_ids[i].getHits().size();
        const vector<ProteinHit>& temp_hits = prot_ids[i].getHits();
        for (Size j = 0; j < temp_hits.size(); ++j)
        {
          proteins.insert(temp_hits[j].getAccession());
        }
      }
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000037"; ///< cv accession
      qp.id = base_name + "_misscleave"; ///< Identifier
      qp.value = missedcleavages;
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "total number of missed cleavages"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);

      
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000032"; ///< cv accession
      qp.id = base_name + "_totprot"; ///< Identifier
      qp.value = protein_hit_count;
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
        qp.name = "total number of identified proteins"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000033"; ///< cv accession
      qp.id = base_name + "_totuniqprot"; ///< Identifier
      qp.value = String(proteins.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of uniquely identified proteins"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000029"; ///< cv accession
      qp.id = base_name + "_psms"; ///< Identifier
      qp.value = String(spectrum_count);
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of PSM"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000030"; ///< cv accession
      qp.id = base_name + "_totpeps"; ///< Identifier
      qp.value = String(peptide_hit_count);
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of identified peptides"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000031"; ///< cv accession
      qp.id = base_name + "_totuniqpeps"; ///< Identifier
      qp.value = String(peptides.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "total number of uniquely identified peptides"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000038";
      at.qualityRef = msid_ref;
      at.id = base_name + "_massacc"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "delta ppm tables";
      }
      
      //~ delta ppm QC:0000039 RT MZ uniqueness ProteinID MS:1000885 target/decoy Score PeptideSequence MS:1000889 Annots string Similarity Charge UO:0000219 TheoreticalWeight UO:0000221 Oxidation_(M)
      at.colTypes.push_back("RT");
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("Score");
      at.colTypes.push_back("PeptideSequence");
      at.colTypes.push_back("Charge");
      at.colTypes.push_back("TheoreticalWeight");
      at.colTypes.push_back("delta_ppm");
      for (UInt w = 0; w < var_mods.size(); ++w)
      {
        at.colTypes.push_back(String(var_mods[w]).substitute(' ', '_'));
      }

      std::vector<double> deltas;
      //~ prot_ids[0].getSearchParameters();
      for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it)
      {
        if (it->getHits().size() > 0)
        {
          std::vector<String> row;
          row.push_back(it->getRT());
          row.push_back(it->getMZ());
          PeptideHit tmp = it->getHits().front(); //TODO depends on score & sort
          vector<UInt> pep_mods;
          for (UInt w = 0; w < var_mods.size(); ++w)
          {
            pep_mods.push_back(0);
          }
          for (AASequence::ConstIterator z =  tmp.getSequence().begin(); z != tmp.getSequence().end(); ++z)
          {
            Residue res = *z;
            String temp;
            if (res.getModification().size() > 0 && res.getModification() != "Carbamidomethyl")
            {
              temp = res.getModification() + " (" + res.getOneLetterCode()  + ")";
              //cout<<res.getModification()<<endl;
              for (UInt w = 0; w < var_mods.size(); ++w)
              {
                if (temp == var_mods[w])
                {
                  //cout<<temp;
                  pep_mods[w] += 1;
                }
              }
            }
          }
          row.push_back(tmp.getScore());
          row.push_back(tmp.getSequence().toString().removeWhitespaces());
          row.push_back(tmp.getCharge());
          row.push_back(String((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()));
          double dppm = /* std::abs */ (getMassDifference(((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()), it->getMZ(), true));
          row.push_back(String(dppm));
          deltas.push_back(dppm);
          for (UInt w = 0; w < var_mods.size(); ++w)
          {
            row.push_back(pep_mods[w]);
          }
          at.tableRows.push_back(row);
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
      

      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000040"; ///< cv accession
      qp.id = base_name + "_mean_delta"; ///< Identifier
      qp.value = String(OpenMS::Math::mean(deltas.begin(), deltas.end()));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "mean delta ppm"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000041"; ///< cv accession
      qp.id = base_name + "_median_delta"; ///< Identifier
      qp.value = String(OpenMS::Math::median(deltas.begin(), deltas.end(), false));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "median delta ppm"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);


      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000035"; ///< cv accession
      qp.id = base_name + "_ratio_id"; ///< Identifier
      qp.value = String(double(pep_ids.size()) / double(mslevelcounts[2]));
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "id ratio"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);
    }

    //-------------------------------------------------------------
    // MS quantitation
    //------------------------------------------------------------
    FeatureMap map;
    String msqu_ref = base_name + "_msqu";
    if (inputfile_feature != "")
    {
      FeatureXMLFile f;
      f.load(inputfile_feature, map);

      cout << "Read featureXML file..." << endl;

      //~ UInt fiter = 0;
      map.sortByRT();
      map.updateRanges();

      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000045"; ///< cv accession
      qp.id = msqu_ref; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "MS quantification result details"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);
      
      qp = QcMLFile::QualityParameter();
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:0000046"; ///< cv accession
      qp.id = base_name + "_feature_count"; ///< Identifier
      qp.value = String(map.size());
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc);
        qp.name = term.name; ///< Name
      }
      catch (...)
      {
         qp.name = "number of features"; ///< Name
      }
      qcmlfile.addRunQualityParameter(base_name, qp);      
    }

    if (inputfile_feature != "" && !remove_duplicate_features)
    {
      
      QcMLFile::Attachment at;
      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000047";
      at.qualityRef = msqu_ref;
      at.id = base_name + "_features"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "features"; ///< Name
      }
      
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("RT");
      at.colTypes.push_back("Intensity");
      at.colTypes.push_back("Charge");
      at.colTypes.push_back("Quality");
      at.colTypes.push_back("FWHM");
      at.colTypes.push_back("IDs");
      UInt fiter = 0;
      map.sortByRT();
      //ofstream out(outputfile_name.c_str());
      while (fiter < map.size())
      {
        std::vector<String> row;
        row.push_back(map[fiter].getMZ());
        row.push_back(map[fiter].getRT());
        row.push_back(map[fiter].getIntensity());
        row.push_back(map[fiter].getCharge());
        row.push_back(map[fiter].getOverallQuality());
        row.push_back(map[fiter].getWidth());
        row.push_back(map[fiter].getPeptideIdentifications().size());
        fiter++;
        at.tableRows.push_back(row);
      }     
      qcmlfile.addRunAttachment(base_name, at);
    }
    else if (inputfile_feature != "" && remove_duplicate_features)
    {
      QcMLFile::Attachment at;
      at = QcMLFile::Attachment();
      at.cvRef = "QC"; ///< cv reference
      at.cvAcc = "QC:0000047";
      at.qualityRef = msqu_ref;
      at.id = base_name + "_features"; ///< Identifier
      try
      {
        const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc);
        at.name = term.name; ///< Name
      }
      catch (...)
      {
        at.name = "features"; ///< Name
      }
      
      at.colTypes.push_back("MZ");
      at.colTypes.push_back("RT");
      at.colTypes.push_back("Intensity");
      at.colTypes.push_back("Charge");
      FeatureMap map, map_out;
      FeatureXMLFile f;
      f.load(inputfile_feature, map);
      UInt fiter = 0;
      map.sortByRT();
      while (fiter < map.size())
      {
        FeatureMap map_tmp;
        for (UInt k = fiter; k <= map.size(); ++k)
        {
          if (abs(map[fiter].getRT() - map[k].getRT()) < 0.1)
          {
            //~ cout << fiter << endl;
            map_tmp.push_back(map[k]);
          }
          else
          {
            fiter = k;
            break;
          }
        }
        map_tmp.sortByMZ();
        UInt retif = 1;
        map_out.push_back(map_tmp[0]);
        while (retif < map_tmp.size())
        {
          if (abs(map_tmp[retif].getMZ() - map_tmp[retif - 1].getMZ()) > 0.01)
          {
            cout << "equal RT, but mass different" << endl;
            map_out.push_back(map_tmp[retif]);
          }
          retif++;
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
    }
    if (inputfile_consensus != "")
    {
      cout << "Reading consensusXML file..." << endl;
      ConsensusXMLFile f;
      ConsensusMap map;
      f.load(inputfile_consensus, map);
      //~ String CONSENSUS_NAME = "_consensus.tsv";
      //~ String combined_out = outputfile_name + CONSENSUS_NAME;
      //~ ofstream out(combined_out.c_str());

      at = QcMLFile::Attachment();
      qp.name = "consensuspoints"; ///< Name
      //~ qp.id = base_name + "_consensuses"; ///< Identifier
      qp.cvRef = "QC"; ///< cv reference
      qp.cvAcc = "QC:xxxxxxxx"; ///< cv accession "featuremapper results"

      at.colTypes.push_back("Native_spectrum_ID");
      at.colTypes.push_back("DECON_RT_(sec)");
      at.colTypes.push_back("DECON_MZ_(Th)");
      at.colTypes.push_back("DECON_Intensity");
      at.colTypes.push_back("Feature_RT_(sec)");
      at.colTypes.push_back("Feature_MZ_(Th)");
      at.colTypes.push_back("Feature_Intensity");
      at.colTypes.push_back("Feature_Charge");
      for (ConsensusMap::const_iterator cmit = map.begin(); cmit != map.end(); ++cmit)
      {
        const ConsensusFeature& CF = *cmit;
        for (ConsensusFeature::const_iterator cfit = CF.begin(); cfit != CF.end(); ++cfit)
        {
          std::vector<String> row;
          FeatureHandle FH = *cfit;
          row.push_back(CF.getMetaValue("spectrum_native_id"));
          row.push_back(CF.getRT()); row.push_back(CF.getMZ());
          row.push_back(CF.getIntensity());
          row.push_back(FH.getRT());
          row.push_back(FH.getMZ());
          row.push_back(FH.getCharge());
          at.tableRows.push_back(row);
        }
      }
      qcmlfile.addRunAttachment(base_name, at);
    }
    
    
    //-------------------------------------------------------------
    // finalize
    //------------------------------------------------------------
    qcmlfile.store(outputfile_name);
    return EXECUTION_OK;
  }
Example #4
0
  ExitCodes main_(int, const char **)
  {
    String plot_file = "";
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    StringList in_files     = getStringList_("in");
    String out              = getStringOption_("out");
    String setname          = getStringOption_("setname");

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    QcMLFile qcmlfile;
    for (Size i = 0; i < in_files.size(); ++i)
    {
      QcMLFile tmpfile;
      tmpfile.load(in_files[i]);
      qcmlfile.merge(tmpfile,setname);
    }

    // make #ms2 set stats
    std::vector<String> ms2nums_strings;
    qcmlfile.collectSetParameter(setname,"QC:0000015", ms2nums_strings);
    std::vector<Int> ms2nums;
    for (std::vector<String>::iterator it = ms2nums_strings.begin(); it != ms2nums_strings.end(); ++it) //transform is too ugly and errorprone
    {
      ms2nums.push_back(it->toInt());
    }

    std::sort(ms2nums.begin(), ms2nums.end());

    if (ms2nums.size()>0)
    {
      std::map<String,String> nums;
      std::map<String,String> nams;
      //~ min,q1,q2,q3,max
      nums["QC:0000043"] = String(ms2nums.front());
      nams["QC:0000043"] = "min ms2 number";
      nums["QC:0000044"] = String(OpenMS::Math::quantile(ms2nums.begin(), ms2nums.end(),25));
      nams["QC:0000044"] = "Q1 ms2 number";
      nums["QC:0000045"] = String(OpenMS::Math::quantile(ms2nums.begin(), ms2nums.end(),50));
      nams["QC:0000045"] = "Q2 ms2 number";
      nums["QC:0000046"] = String(OpenMS::Math::quantile(ms2nums.begin(), ms2nums.end(),75));
      nams["QC:0000046"] = "Q3 ms2 number";
      nums["QC:0000047"] = String(ms2nums.back());
      nams["QC:0000047"] = "max ms2 number";

      addBoxPlotQPs(nums, nams, setname, qcmlfile);
    }

    // make #id set stats
    std::vector<String> idnums_strings;
    qcmlfile.collectSetParameter(setname,"QC:0000020", idnums_strings);
    std::vector<Int> idnums;
    for (std::vector<String>::iterator it = idnums_strings.begin(); it != idnums_strings.end(); ++it) //transform is too ugly and errorprone
    {
      idnums.push_back(it->toInt());
    }

    std::sort(idnums.begin(), idnums.end());

    if (idnums.size()>0)
    {
      std::map<String,String> nums;
      std::map<String,String> nams;
      //~ min,q1,q2,q3,max

      nums["QC:0000053"] = String(idnums.front());
      nams["QC:0000053"] = "min id numbers";
      nums["QC:0000054"] = String(OpenMS::Math::quantile(idnums.begin(), idnums.end(),25));
      nams["QC:0000054"] = "Q1 id numbers";
      nums["QC:0000055"] = String(OpenMS::Math::quantile(idnums.begin(), idnums.end(),50));
      nams["QC:0000055"] = "Q2 id numbers";
      nums["QC:0000056"] = String(OpenMS::Math::quantile(idnums.begin(), idnums.end(),75));
      nams["QC:0000056"] = "Q3 id numbers";
      nums["QC:0000057"] = String(idnums.back());
      nams["QC:0000057"] = "max id number";

      addBoxPlotQPs(nums, nams, setname, qcmlfile);
    }

    qcmlfile.store(out);
    return EXECUTION_OK;
  }
Example #5
0
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String in = getStringOption_("in");
    String out = getStringOption_("out");
    String mappi = getStringOption_("mapping");
    String tab = getStringOption_("table");
    
    ControlledVocabulary cv;
    cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo"));
    cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo"));
    
    //-------------------------------------------------------------
    // reading input
    //------------------------------------------------------------
    QcMLFile qcmlfile;
    if (in != "")
    {
      qcmlfile.load(in);
    }
        
    if (mappi != "" && tab != "")
    {
      CsvFile csv_file(tab);
      CsvFile map_file(mappi);
      
      if (map_file.size()<2) //assumed that first row is the header of table and second row is the according qc
      {
        cerr << "Error: You have to give a mapping of your table (first row is the header of table and second row is the according qc). Aborting!" << endl;
        return ILLEGAL_PARAMETERS;
      }
      StringList header,according;
      map_file.getRow(0, header);
      map_file.getRow(1, according);
      
      if (header.size() != according.size())
      {
        cerr << "Error: You have to give a mapping of your table (first row is the header of table and second row is the according qc). Aborting!" << endl;
        return ILLEGAL_PARAMETERS;
      }
      //~ std::map<String,String> mapping;
      //~ std::transform( header.begin(), header.end(), according.begin(), std::inserter(mapping, mapping.end() ), std::make_pair<String,String> );
      int runset_col = -1;
      for (Size i = 0; i < according.size(); ++i)
      {
        if (!cv.exists(according[i]))
        {
          try
          {
            const ControlledVocabulary::CVTerm& term = cv.getTermByName(according[i]);
            header[i] = term.name;
            according[i] = term.id;
          }
          catch (...)
          {
            cerr << "Error: You have to specify a correct cv with accession or name in col "<< String(i) <<". Aborting!" << endl;
            //~ cerr << "Header was: "<< header[i] << " , according value was: " << according[i] << endl;
            return ILLEGAL_PARAMETERS;
          }
        }
        else
        {
          const ControlledVocabulary::CVTerm& term = cv.getTerm(according[i]);
          header[i] = term.name;
        }
        if (header[i] == "raw data file") //TODO add set name as possibility!
        {
          runset_col = i;
        }
      }
      if (runset_col < 0)
      {
        cerr << "Error: You have to give a mapping of your table - rows to runs/sets. Aborting!" << endl;
        return ILLEGAL_PARAMETERS;
      }

      if (csv_file.size()>1)
      {
        StringList li;
        for (Size i = 1; i < csv_file.size(); ++i)
        {
          StringList li;
          csv_file.getRow(i, li);
          if (li.size() < according.size())
          {
            cerr << "Error: You have to give a correct mapping of your table - row " << String(i+1) <<" is too short. Aborting!" << endl;
            return ILLEGAL_PARAMETERS;
          }
          
          std::vector< QcMLFile::QualityParameter > qps;
          String id;
          bool set = false;
          for (Size j = 0; j < li.size(); ++j)
          {
            if (j==runset_col)
            {
              if (qcmlfile.existsRun(li[j])) //TODO this only works for real run IDs
              {
                id = li[j];
              }
              else if (qcmlfile.existsSet(li[j])) //TODO this only works for real set IDs
              {
                id = li[j];
                set = true;
              }
              else
              {
                id = li[j];
                qcmlfile.registerRun(id,id);
                //TODO warn that if this was supposed to be a set - now it is not!
              }
            }
            QcMLFile::QualityParameter def;
            def.name = header[j]; ///< Name
            def.id = String(UniqueIdGenerator::getUniqueId());
            def.cvRef = "QC"; ///< cv reference ('full name')
            def.cvAcc = according[j];
            def.value = li[j];
            qps.push_back(def);
          }
          if (id!="")
          {
            for (std::vector<QcMLFile::QualityParameter>::const_iterator qit = qps.begin(); qit != qps.end(); ++qit)
            {
              if (!set)
              {
                qcmlfile.addRunQualityParameter(id, *qit);
              }
              else
              {
                qcmlfile.addSetQualityParameter(id, *qit);
              }
            }
          }
        }
      }
    }
    qcmlfile.store(out);
    return EXECUTION_OK;
  }