Exemple #1
0
  ExitCodes main_(int, const char**) override
  {
    vector<ProteinIdentification> protein_identifications;

    vector<PeptideIdentification> identifications;
    PeptideIdentification peptide_identification;
    DateTime date_time = DateTime::now();
    String date_time_string = date_time.get();
    peptide_identification.setIdentifier("In-silico_digestion" + date_time_string);

    ProteinIdentification protein_identification;

    protein_identifications.push_back(ProteinIdentification());
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    String inputfile_name = getStringOption_("in");
    String outputfile_name = getStringOption_("out");

    FASTAID FASTA_ID = getStringOption_("FASTA:ID") == "parent" ? PARENT : (getStringOption_("FASTA:ID") == "number" ? NUMBER : BOTH);
    bool keep_FASTA_desc = (getStringOption_("FASTA:description") == "keep");

    // output file type
    FileHandler fh;
    FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type"));

    if (out_type == FileTypes::UNKNOWN)
    {
      out_type = fh.getTypeByFileName(outputfile_name);
      writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 2);
    }

    if (out_type == FileTypes::UNKNOWN)
    {
      LOG_ERROR << ("Error: Could not determine output file type!") << std::endl;
      return PARSE_ERROR;
    }

    Size min_size = getIntOption_("min_length");
    Size max_size = getIntOption_("max_length");
    Size missed_cleavages = getIntOption_("missed_cleavages");


    bool has_FASTA_output = (out_type == FileTypes::FASTA);

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------
    FASTAFile ff;
    ff.readStart(inputfile_name);
    if (has_FASTA_output) ff.writeStart(outputfile_name);

    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------

    // This should be updated if more cleavage enzymes are available
    ProteinIdentification::SearchParameters search_parameters;
    String enzyme = getStringOption_("enzyme");
    ProteaseDigestion digestor;
    digestor.setEnzyme(enzyme);
    digestor.setMissedCleavages(missed_cleavages);
    search_parameters.digestion_enzyme = *ProteaseDB::getInstance()->getEnzyme(enzyme);

    PeptideHit temp_peptide_hit;
    PeptideEvidence temp_pe;

    protein_identifications[0].setSearchParameters(search_parameters);
    protein_identifications[0].setDateTime(date_time);
    protein_identifications[0].setSearchEngine("In-silico digestion");
    protein_identifications[0].setIdentifier("In-silico_digestion" + date_time_string);

    Size dropped_by_length(0); // stats for removing candidates
    Size fasta_out_count(0);

    FASTAFile::FASTAEntry fe;
    while (ff.readNext(fe))
    {
      if (!has_FASTA_output)
      {
        ProteinHit temp_protein_hit;
        temp_protein_hit.setSequence(fe.sequence);
        temp_protein_hit.setAccession(fe.identifier);
        protein_identifications[0].insertHit(temp_protein_hit);
        temp_pe.setProteinAccession(fe.identifier);
        temp_peptide_hit.setPeptideEvidences(vector<PeptideEvidence>(1, temp_pe));
      }

      vector<AASequence> current_digest;
      if (enzyme == "none")
      {
        current_digest.push_back(AASequence::fromString(fe.sequence));
      }
      else
      {
        dropped_by_length += digestor.digest(AASequence::fromString(fe.sequence), current_digest, min_size, max_size);
      }

      String id = fe.identifier;
      for (auto const& s : current_digest)
      {
        if (!has_FASTA_output)
        {
          temp_peptide_hit.setSequence(s);
          peptide_identification.insertHit(temp_peptide_hit);
          identifications.push_back(peptide_identification);
          peptide_identification.setHits(std::vector<PeptideHit>()); // clear
        }
        else // for FASTA file output
        {
          ++fasta_out_count;
          switch (FASTA_ID)
          {
            case PARENT: break;
            case NUMBER: id = String(fasta_out_count); break;
            case BOTH: id = fe.identifier + "_" + String(fasta_out_count); break;
          }
          ff.writeNext(FASTAFile::FASTAEntry(id, keep_FASTA_desc ? fe.description : "", s.toString()));
        }
      }
    }

    //-------------------------------------------------------------
    // writing output
    //-------------------------------------------------------------

    if (has_FASTA_output)
    {
      ff.writeEnd();
    }
    else
    {
      IdXMLFile().store(outputfile_name,
                        protein_identifications,
                        identifications);
    }

    Size pep_remaining_count = (has_FASTA_output ? fasta_out_count : identifications.size());
    LOG_INFO << "Statistics:\n"
             << "  file:                                    " << inputfile_name << "\n"
             << "  total #peptides after digestion:         " << pep_remaining_count + dropped_by_length << "\n"
             << "  removed #peptides (length restrictions): " << dropped_by_length << "\n"
             << "  remaining #peptides:                     " << pep_remaining_count << std::endl;

    return EXECUTION_OK;
  }
  ExitCodes main_(int, const char **)
  {
    //-------------------------------------------------------------
    // parsing parameters
    //-------------------------------------------------------------
    StringList in(getStringList_("in"));
    String out(getStringOption_("out"));
    bool append = (!getFlag_("only_decoy"));
    bool shuffle = (getStringOption_("method") == "shuffle");
    String decoy_string(getStringOption_("decoy_string"));
    bool decoy_string_position_prefix = (String(getStringOption_("decoy_string_position")) == "prefix" ? true : false);

    //-------------------------------------------------------------
    // reading input
    //-------------------------------------------------------------

    if (in.size() == 1)
    {
      LOG_WARN << "Warning: Only one FASTA input file was provided, which might not contain contaminants. You probably want to have them! Just add the contaminant file to the input file list 'in'." << endl;
    }

    set<String> identifiers; // spot duplicate identifiers  // std::unordered_set<string> has slightly more RAM, but slightly less CPU

    FASTAFile f;
    f.writeStart(out);
    FASTAFile::FASTAEntry protein;
      
    for (Size i = 0; i < in.size(); ++i)
    {
      f.readStart(in[i]);  

      //-------------------------------------------------------------
      // calculations
      //-------------------------------------------------------------
      while (f.readNext(protein))
      {
        if (identifiers.find(protein.identifier) != identifiers.end())
        {
          LOG_WARN << "DecoyDatabase: Warning, identifier '" << protein.identifier << "' occurs more than once!" << endl;
        }
        identifiers.insert(protein.identifier);

        if (append)
        {
          f.writeNext(protein);
        }
      
        // identifier
        protein.identifier = getIdentifier_(protein.identifier, decoy_string, decoy_string_position_prefix);
      
        // sequence
        if (shuffle)
        {
          String temp;
          Size x = protein.sequence.size();
          srand(time(0));
          while (x != 0)
          {
            Size y = rand() % x;
            temp += protein.sequence[y];
            --x;
            protein.sequence[y] = protein.sequence[x]; // overwrite consumed position with last position (about to go out of scope for next dice roll)
          }
        }
        else // reverse
        {
          protein.sequence.reverse();
        }
        //-------------------------------------------------------------
        // writing output
        //-------------------------------------------------------------
        f.writeNext(protein);
      
      } // next protein
    } // input files

    return EXECUTION_OK;
  }