static void show_pgl(GthPGL *pgl, GtUword pglnum,
                     GtUword translationtable, GthInput *input,
                     unsigned int indentlevel, GthOutput *out)
{
  GtUword i;
  GtFile *outfp = out->outfp;

  gt_assert(!out->gff3out);

  if (out->xmlout) {
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<predicted_gene_location>\n");
    indentlevel++;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<PGL_line PGL_serial=\"" GT_WU "\" "
                    "PGL_strand=\"%c\" PGL_start=\"" GT_WU "\" PGL_stop=\""
                    GT_WU "\"/>\n",
                    pglnum + OUTPUTOFFSET,
                    SHOWSTRAND(gth_pgl_is_forward(pgl)),
                    SHOWGENPOS(gth_pgl_is_forward(pgl),
                               gth_pgl_total_length(pgl),
                               gth_pgl_genomic_offset(pgl),
                               pgl->maxrange.start),
                    SHOWGENPOS(gth_pgl_is_forward(pgl),
                               gth_pgl_total_length(pgl),
                               gth_pgl_genomic_offset(pgl),
                               pgl->maxrange.end));
  }
  else {
    gt_file_xprintf(outfp, "PGL %3" GT_WUS " (%c strand):      " GT_WU "     "
                    GT_WU,
                    pglnum + OUTPUTOFFSET,
                    SHOWSTRAND(gth_pgl_is_forward(pgl)),
                    SHOWGENPOS(gth_pgl_is_forward(pgl),
                               gth_pgl_total_length(pgl),
                               gth_pgl_genomic_offset(pgl),
                               pgl->maxrange.start),
                    SHOWGENPOS(gth_pgl_is_forward(pgl),
                               gth_pgl_total_length(pgl),
                               gth_pgl_genomic_offset(pgl),
                               pgl->maxrange.end));
    if (out->pglgentemplate)
      gt_file_xprintf(outfp, " (genomic template '%s')", gth_pgl_gen_id(pgl));
    gt_file_xfputc('\n', outfp);
  }

  for (i = 0; i < gt_array_size(pgl->assemblies); i++) {
    show_ags(gth_pgl_get_ags(pgl, i), pglnum, i, translationtable, input,
             indentlevel, out);
  }

  if (out->xmlout) {
    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</predicted_gene_location>\n");
  }
}
Esempio n. 2
0
static void xml_show_pgl(GthPGL *pgl, unsigned long pglnum,
                         unsigned long translationtable, GthInput *input,
                         unsigned int indentlevel, GthOutput *out)
{
  unsigned long i;

  gth_indent(out->outfp, indentlevel);
  gt_file_xprintf(out->outfp, "<predicted_gene_location>\n");
  indentlevel++;
  gth_indent(out->outfp, indentlevel);
  gt_file_xprintf(out->outfp,
                     "<PGL_line PGL_serial=\"%lu\" PGL_strand=\"%c\" "
                     "PGL_start=\"%lu\" PGL_stop=\"%lu\"/>\n",
                     pglnum + OUTPUTOFFSET,
                     SHOWSTRAND(gth_pgl_is_forward(pgl)),
                     SHOWGENPOS(gth_pgl_is_forward(pgl),
                                gth_pgl_total_length(pgl),
                                gth_pgl_genomic_offset(pgl),
                                pgl->maxrange.start),
                     SHOWGENPOS(gth_pgl_is_forward(pgl),
                                gth_pgl_total_length(pgl),
                                gth_pgl_genomic_offset(pgl),
                                pgl->maxrange.end));

  for (i = 0; i < gth_pgl_num_of_ags(pgl); i++) {
    xml_show_ags(gth_pgl_get_ags(pgl, i), pglnum, i, translationtable, input,
                 indentlevel, out);
  }

  indentlevel--;
  gth_indent(out->outfp, indentlevel);
  gt_file_xprintf(out->outfp, "</predicted_gene_location>\n");
}
Esempio n. 3
0
static void show_chain_calc_status(GthShowVerbose showverbose,
                                   GtUword chainnum,
                                   GtUword numofchains,
                                   GtUword numofmatches,
                                   GtUword currentgen_file_num,
                                   GtUword numofgenomicfiles,
                                   GtUword currentreffilenum,
                                   GtUword numofreffiles,
                                   bool directmatches, bool verboseseqs,
                                   GtUword genseqnum,
                                   GtUword refseqnum)
{
  char buf[SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE];
  GT_UNUSED int rval;

  gt_assert(numofchains > 0);

  if (numofgenomicfiles == 1 && numofreffiles == 1) {
    rval = snprintf(buf, SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE,
                    "d=%c, compute chains for bucket "GT_WU"/"GT_WU
                    " (matches in bucket="GT_WU")", SHOWSTRAND(directmatches),
                    chainnum, numofchains, numofmatches);
  }
  else {
    rval = snprintf(buf, SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE,
                    "gf="GT_WU"/"GT_WU", d=%c, rf="GT_WU"/"GT_WU
                    ", compute chains for bucket "GT_WU"/"GT_WU
                    " (matches in bucket="GT_WU")", currentgen_file_num + 1,
                    numofgenomicfiles, SHOWSTRAND(directmatches),
                    currentreffilenum + 1, numofreffiles, chainnum, numofchains,
                    numofmatches);
  }
  /* buf[SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE] is large enough */
  gt_assert(rval < SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE);
  showverbose(buf);

  if (verboseseqs) {
    rval = snprintf(buf, SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE,
                    "genseqnum="GT_WU", refseqnum="GT_WU"", genseqnum,
                    refseqnum);
    /* buf[SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE] is large enough */
    gt_assert(rval < SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE);
    showverbose(buf);
  }
}
static void show_matrix_calculation_status(GthShowVerbose showverbose,
                                           bool gen_strand_forward,
                                           bool ref_strand_forward,
                                           bool introncutout,
                                           GtUword chainctr,
                                           GtUword num_of_chains,
                                           GtUword icdelta,
                                           GtUword gen_file_num,
                                           GtUword num_of_gen_files,
                                           GtUword ref_file_num,
                                           GtUword num_of_ref_files,
                                           bool directmatches,
                                           bool verboseseqs,
                                           const char *gen_id,
                                           const char *ref_id)
{
  char buf[SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE],
       icdeltastring[ICDELTASTRINGLENGTH];
  GT_UNUSED int rval;

  if (introncutout) {
    rval =  snprintf(icdeltastring, ICDELTASTRINGLENGTH, ", icdelta=" GT_WU ,
                     icdelta);
    /* buffer icdeltastring[ICDELTASTRINGLENGTH] is large enough */
    gt_assert(rval <  ICDELTASTRINGLENGTH);
  }

  if (num_of_gen_files == 1 && num_of_ref_files == 1) {
    rval = snprintf(buf, SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE,
                    "d=%c, compute spliced alignment, genseq=%c, "
                    "chain=" GT_WU "/" GT_WU ", refseq=%c%s",
                    SHOWSTRAND(directmatches), SHOWSTRAND(gen_strand_forward),
                    chainctr + 1, num_of_chains, SHOWSTRAND(ref_strand_forward),
                    introncutout ? icdeltastring : "");
  }
  else {
    rval = snprintf(buf, SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE,
                    "gf=" GT_WU "/" GT_WU ", d=%c, rf=" GT_WU "/" GT_WU
                    ", compute spliced alignment, gs=%c, chain=" GT_WU "/"
                    GT_WU ", rs=%c%s",
                    gen_file_num + 1, num_of_gen_files,
                    SHOWSTRAND(directmatches), ref_file_num + 1,
                    num_of_ref_files, SHOWSTRAND(gen_strand_forward),
                    chainctr + 1, num_of_chains, SHOWSTRAND(ref_strand_forward),
                    introncutout ? icdeltastring : "");
  }
  /* buf[SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE] is large enough */
  gt_assert(rval <  SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE);
  showverbose(buf);

  if (verboseseqs) {
    rval = snprintf(buf, SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE,
                    "genomicid=%s, referenceid=%s", gen_id, ref_id);
    /* buf[SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE] is large enough */
    gt_assert(rval < SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE);
    showverbose(buf);
  }
}
Esempio n. 5
0
void gth_save_chain(GtChain *chain, GtFragment *fragments,
                    unsigned long num_of_fragments,
                    GT_UNUSED unsigned long max_gap_width,
                    void *data)
{
  GthSaveChainInfo *info = (GthSaveChainInfo*) data;
  GtRange range;
  GthChain *gth_chain;
  unsigned long i, fragnum;

  gt_assert(chain_is_colinear(chain, fragments));

  if (info->comments) {
    gt_file_xprintf(info->outfp, "%c process global chain with score %ld\n",
                       COMMENTCHAR, gt_chain_get_score(chain));
    gt_file_xprintf(info->outfp, "%c process global chain with the "
                       "following fragments\n", COMMENTCHAR);
    for (i = 0; i < gt_chain_size(chain); i++)
      showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp);
  }

  /* init */
  gth_chain = gth_chain_new();
  gth_chain->gen_file_num = info->gen_file_num;
  gth_chain->gen_seq_num  = info->gen_seq_num;
  gth_chain->ref_file_num = info->ref_file_num;
  gth_chain->ref_seq_num  = info->ref_seq_num;

  /* chain has a minimum length of 1 */
  gt_assert(gt_chain_size(chain));

  /* global chain filter */
  if (globalchainislongenough(chain, fragments,
                              &gth_chain->refseqcoverage, info->gcmincoverage,
                              info->referencelength, info->stat, info->comments,
                              info->outfp)) {
    /* save all potential exons */
    for (i = 0; i < gt_chain_size(chain); i++) {
      fragnum = gt_chain_get_fragnum(chain, i);
      range.start = fragments[fragnum].startpos2;
      range.end = fragments[fragnum].endpos2;

      /* check for overlap */
      if (i > 0 &&
         range.start <=
         ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end) {
        /* overlap found -> modify last range */
        gt_assert(((GtRange*) gt_array_get_last(gth_chain->forwardranges))
                  ->end <= range.end);
        ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end =
          range.end;
      }
      else {
#ifndef NDEBUG
        if (i > 0) {
          /* gap width is smaller or equal than the maximum gap width */
          gt_assert((range.start - 1 -
                 ((GtRange*) gt_array_get_last(gth_chain->forwardranges))
                 ->end + 1 - 1) <= max_gap_width);
        }
#endif
        /* save range */
        gt_array_add(gth_chain->forwardranges, range);
      }
    }

    GtRange genomicrange = chain_get_genomicrange(gth_chain);

    if (info->enrichchains) {
      enrich_chain(gth_chain, fragments, num_of_fragments, info->comments,
                   info->outfp);
    }

    gt_assert(gt_ranges_are_consecutive(gth_chain->forwardranges));

    /* copy ranges to opposite strand */
    gt_ranges_copy_to_opposite_strand(gth_chain->reverseranges,
                                      gth_chain->forwardranges,
                                      info->gen_total_length,
                                      info->gen_offset);

    /* compute jump table if necessary */
    if (info->jump_table) {
      GthJumpTable *forward_jump_table, *reverse_jump_table;
      GtArray *chain_fragments;
      chain_fragments = make_list_of_chain_fragments(chain, fragments,
                                                     num_of_fragments,
                                                     info->enrichchains,
                                                     &genomicrange);
      forward_jump_table =
        info->jump_table_new(gt_array_get_space(chain_fragments),
                             gt_array_size(chain_fragments), info->jtdebug);
      reverse_jump_table =
        info->jump_table_new_reverse(forward_jump_table,
                                     info->gen_total_length, info->gen_offset,
                                     info->ref_total_length, info->ref_offset);
      gt_assert(!gth_chain->forward_jump_table);
      gth_chain->forward_jump_table = forward_jump_table;
      gt_assert(!gth_chain->reverse_jump_table);
      gth_chain->reverse_jump_table = reverse_jump_table;
      gt_array_delete(chain_fragments);
      gth_chain->jump_table_delete = info->jump_table_delete;
    }

    /* save array of potential exons */
    gth_chain_collection_add(info->chain_collection, gth_chain);
    if (info->comments) {
      gt_file_xprintf(info->outfp, "%c global chain with the following "
                                   "ranges has been saved\n",COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c forward ranges:\n", COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR);
      gt_ranges_show(gth_chain->forwardranges, info->outfp);
      gt_file_xprintf(info->outfp, "%c reverse ranges:\n", COMMENTCHAR);
      gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR);
      gt_ranges_show(gth_chain->reverseranges, info->outfp);
    }

    /* output stored chains here
       (Mohamed needed this to compare the chaining phase of gth with CHAINER)
     */
    if (info->stopafterchaining) {
      gt_file_xprintf(info->outfp,
                      "%c gl. chain with coverage=%.2f and score %ld "
                      "(genseq=%lu, str.=%c, refseq=%lu)\n", COMMENTCHAR,
                      gth_chain->refseqcoverage, gt_chain_get_score(chain),
                      gth_chain->gen_seq_num, SHOWSTRAND(info->directmatches),
                      gth_chain->ref_seq_num);

      for (i = 0; i < gt_chain_size(chain); i++)
        showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp);
    }
  }
  else {
    /* for -paralogs this case is not supposed to occur */
    gt_assert(!info->paralogs);
    if (info->comments)
      gt_file_xprintf(info->outfp, "%c global chain discarded\n",
                         COMMENTCHAR);
    gth_chain_delete(gth_chain);
  }
}
Esempio n. 6
0
char gth_sa_ref_strand_char(const GthSA *sa)
{
  gt_assert(sa);
  return SHOWSTRAND(sa->ref_strand_forward);
}
Esempio n. 7
0
void gt_outputtranslationandorf(unsigned long pglnum, const GthAGS *ags,
                                unsigned long agsnum,
                                unsigned long translationtable,
                                GthInput *input,
                                unsigned int indentlevel,
                                GthOutput *out)
{
  unsigned long i;
  unsigned int nframe;
  const unsigned char *gen_seq_orig;
  GtStr *frame[3];
  char translated;
  GtTranslatorStatus status;
  GtTranslator *translator;
  GtTransTable *transtable;
  GtCodonIterator *ci;
  GthSplicedSeq *spliced_seq;
  GtArray *ranges;
  GtFile *outfp = out->outfp;

  /* output header */
  if (out->xmlout) {
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<three_phase_translation "
                    "xmlns=\"http://www.genomethreader.org/GTH_output/"
                    "PGL_module/predicted_gene_location/AGS_information/"
                    "three_phase_translation/\">\n");
    indentlevel++;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<description PGL_serial=\"%lu\" "
                              "AGS_serial=\"%lu\" gDNA_strand=\"%c\"/>\n",
                       pglnum + OUTPUTOFFSET, agsnum + OUTPUTOFFSET,
                       SHOWSTRAND(gth_ags_is_forward(ags)));
  }
  else {
    gt_file_xprintf(outfp, "3-phase translation of AGS-%lu (%cstrand):\n\n",
                       agsnum + OUTPUTOFFSET,
                       SHOWSTRAND(gth_ags_is_forward(ags)));
  }

  ranges = gt_array_new(sizeof (GtRange));
  for (i = 0; i < gt_array_size(ags->exons); i++)
    gt_array_add(ranges, ((GthExonAGS*) gt_array_get(ags->exons, i))->range);

  /* get genomic sequence */
  gen_seq_orig = gth_input_original_genomic_sequence(input,
                                                     gth_ags_filenum(ags),
                                                     gth_ags_is_forward(ags));

  spliced_seq = gth_spliced_seq_new(gen_seq_orig, ranges);

  frame[0] = gt_str_new();
  frame[1] = gt_str_new();
  frame[2] = gt_str_new();

  /* prepare for translation */
  ci = gt_codon_iterator_simple_new((const char*) spliced_seq->splicedseq,
                                    spliced_seq->splicedseqlen, NULL);
  gt_assert(ci);
  transtable = gt_trans_table_new(translationtable, NULL);
  gt_assert(transtable);

  /* translate the template in all three frames */
  translator = gt_translator_new_with_table(transtable, ci);
  status = gt_translator_next(translator, &translated, &nframe, NULL);
  while (status == GT_TRANSLATOR_OK) {
    gt_str_append_char(frame[nframe], translated);
    status = gt_translator_next(translator, &translated, &nframe, NULL);
  }
  gt_assert(status != GT_TRANSLATOR_ERROR);
  gt_translator_delete(translator);
  gt_trans_table_delete(transtable);
  gt_codon_iterator_delete(ci);

  /* show the translation */
  showtranslation(spliced_seq, gt_str_get(frame[0]), gt_str_get(frame[1]),
                  gt_str_get(frame[2]), ags->exons, gth_ags_is_forward(ags),
                  gth_ags_total_length(ags), gth_ags_genomic_offset(ags),
                  indentlevel, out);

  /* show the (consolidated) ORFs */
  gthshowORFs(gt_str_get(frame[0]), gt_str_get(frame[1]), gt_str_get(frame[2]),
              gt_str_length(frame[0]), gt_str_length(frame[1]),
              gt_str_length(frame[2]), gth_ags_is_forward(ags),
              gth_ags_total_length(ags), gth_ags_genomic_offset(ags),
              gt_str_get(ags->gen_id), pglnum, agsnum, spliced_seq,
              indentlevel, out);

  if (out->xmlout) {
    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</three_phase_translation>\n");
  }

  gth_spliced_seq_delete(spliced_seq);
  gt_array_delete(ranges);
  gt_str_delete(frame[0]);
  gt_str_delete(frame[1]);
  gt_str_delete(frame[2]);
}