Exemple #1
0
static void xml_outputPGSlines(GtArray *alignments, unsigned int indentlevel,
                               GtFile *outfp)
{
  unsigned long i, j;
  GthSA *sa;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<supporting_evidence xmlns=\""
                  "http://www.genomethreader.org/"
                  "GTH_output/PGL_module/predicted_gene_location/"
                  "AGS_information/supporting_evidence/\">\n");
  indentlevel++;

  for (i = 0; i < gt_array_size(alignments); i++) {
    sa = *(GthSA**) gt_array_get(alignments, i);

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<PGS_line>\n");
    indentlevel++;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<gDNA_exon_coordinates>\n");
    indentlevel++;

    for (j = 0; j < gth_sa_num_of_exons(sa); j++) {
      gth_indent(outfp, indentlevel);
      gt_file_xprintf(outfp, "<exon start=\"%lu\" stop=\"%lu\"/>\n",
                      gth_sa_left_genomic_exon_border(sa, j),
                      gth_sa_right_genomic_exon_border(sa, j));
    }
    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</gDNA_exon_coordinates>\n");
    gth_indent(outfp, indentlevel);
    if (gth_sa_alphatype(sa) == DNA_ALPHA) {
      gt_file_xprintf(outfp, "<referenceDNA id=\"%s\" strand=\"%c\"/>\n",
                         gth_sa_ref_id(sa),
                         gth_sa_ref_strand_char(sa));
    }
    else {
      gt_file_xprintf(outfp, "<referenceProtein id=\"%s\"/>\n",
                         gth_sa_ref_id(sa));
    }
    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</PGS_line>\n");
  }

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</supporting_evidence>\n");
}
/*
  The following function prints the "classic" GeneSeqer2 PGS line
*/
static void showpgsline(GthSA *sa, GtFile *outfp)
{
  GtUword i, numofexons;
  gt_assert(sa);
  numofexons = gth_sa_num_of_exons(sa);
  gt_file_xprintf(outfp, "PGS_%s%c_%s%c\t(",
                     gth_sa_gen_id(sa),
                     gth_sa_gen_strand_char(sa),
                     gth_sa_ref_id(sa),
                     gth_sa_ref_strand_char(sa));

  for (i = 0; i < numofexons; i++) {
    gt_file_xprintf(outfp, ""GT_WU"  "GT_WU"",
                    gth_sa_left_genomic_exon_border(sa, i),
                    gth_sa_right_genomic_exon_border(sa, i));
    if (i == numofexons - 1)
      gt_file_xprintf(outfp, ")\n\n");
    else
      gt_file_xfputc(',', outfp);
  }
}
static void outputPGSlines(GtArray *alignments, GtFile *outfp)
{
  GtUword i, j;
  GthSA *sa;

  for (i = 0; i < gt_array_size(alignments); i++) {
    sa = *(GthSA**) gt_array_get(alignments, i);

    gt_file_xprintf(outfp, "  PGS (");
    for (j = 0; j < gth_sa_num_of_exons(sa); j++) {
      if (j > 0)
        gt_file_xfputc(',', outfp);
      gt_file_xprintf(outfp, GT_WU "  " GT_WU ,
                      gth_sa_left_genomic_exon_border(sa, j),
                      gth_sa_right_genomic_exon_border(sa, j));
    }
    gt_file_xprintf(outfp, ")\t%s%c\n", gth_sa_ref_id(sa),
                       gth_sa_ref_strand_char(sa));
  }

  gt_file_xfputc('\n', outfp);
}
Exemple #4
0
/*
  The following function prints the "classic" GeneSeqer2 PGS line
*/
static void xml_showpgsline(GthSA *sa, unsigned int indentlevel,
                            GtFile *outfp)
{
  unsigned long i, numofexons;
  gt_assert(sa);
  numofexons = gth_sa_num_of_exons(sa);
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<PGS_line>\n");
  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<gDNA gen_id=\"%s\" gen_strand=\"%c\"/>\n",
                     gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa));
  gth_indent(outfp, indentlevel);
  if (gth_sa_alphatype(sa) == DNA_ALPHA) {
    gt_file_xprintf(outfp, "<rDNA rDNA_id=\"%s\" rDNA_strand=\"%c\"/>\n",
                       gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa));
  }
  else {
    gt_file_xprintf(outfp, "<rProt rProt_id=\"%s\"/>\n",
                       gth_sa_ref_id(sa));
  }
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<gDNA_exon_coordinates>\n");
  indentlevel++;

  for (i = 0; i < numofexons; i++) {
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<exon e_start=\"%lu\" e_stop=\"%lu\"/>\n",
                    gth_sa_left_genomic_exon_border(sa, i),
                    gth_sa_right_genomic_exon_border(sa, i));
  }

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</gDNA_exon_coordinates>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</PGS_line>\n");
}
Exemple #5
0
static void xml_showalignmentheader(GthSA *sa,
                                    unsigned long minintronlength,
                                    unsigned int indentlevel, GtFile *outfp)
{
  unsigned long i, leftreferenceexonborder, rightreferenceexonborder,
                referenceexonlength;
  GthDbl exonscore, donorsitescore, acceptorsitescore;
  GthFlt donorsiteprobability, acceptorsiteprobability;
  Exoninfo *exoninfo;
  Introninfo *introninfo;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<predicted_gene_structure>\n");
  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<exon-intron_info>\n");
  indentlevel++;

  for (i = 0; i < gth_sa_num_of_exons(sa); i++) {
    exoninfo = gth_sa_get_exon(sa, i);
    leftreferenceexonborder  = exoninfo->leftreferenceexonborder;
    rightreferenceexonborder = exoninfo->rightreferenceexonborder;
    referenceexonlength      = rightreferenceexonborder
                               - leftreferenceexonborder + 1;
    exonscore                = exoninfo->exonscore;

    if (i > 0) {
      introninfo = gth_sa_get_intron(sa, i-1);
      donorsiteprobability    = introninfo->donorsiteprobability;
      donorsitescore          = introninfo->donorsitescore;
      acceptorsiteprobability = introninfo->acceptorsiteprobability;
      acceptorsitescore       = introninfo->acceptorsitescore;

      gth_indent(outfp, indentlevel);
      gt_file_xprintf(outfp, "<intron i_serial=\"%lu\">\n",
                      i - 1 + OUTPUTOFFSET);
      indentlevel++;
      gth_indent(outfp, indentlevel);
      gt_file_xprintf(outfp,
                      "<gDNA_intron_boundary i_start=\"%lu\" i_stop=\"%lu\" "
                      "i_length=\"%lu\">\n",
                      gth_sa_left_intron_border(sa, i-1),
                      gth_sa_right_intron_border(sa, i-1),
                      gth_sa_intron_length(sa, i-1));
      indentlevel++;
      gth_indent(outfp, indentlevel);
      gt_file_xprintf(outfp, "<donor d_prob=\"%.3f\"", donorsiteprobability);
      if (gth_sa_alphatype(sa) == DNA_ALPHA)
        gt_file_xprintf(outfp, " d_score=\"%.2f\"", donorsitescore);
      gt_file_xprintf(outfp, "/>\n");
      gth_indent(outfp, indentlevel);
      gt_file_xprintf(outfp, "<acceptor a_prob=\"%.3f\"",
                      acceptorsiteprobability);
      if (gth_sa_alphatype(sa) == DNA_ALPHA)
        gt_file_xprintf(outfp, " a_score=\"%.2f\"", acceptorsitescore);
      gt_file_xprintf(outfp, "/>\n");
      indentlevel--;
      gth_indent(outfp, indentlevel);
      gt_file_xprintf(outfp, "</gDNA_intron_boundary>\n");

      /* if the intron is shorter or equal than the minimal intron length an
         additional tag is shown */
      if (gth_sa_intron_length(sa, i-1) <= minintronlength) {
        gth_indent(outfp, indentlevel);
        gt_file_xprintf(outfp, "<shorter_than_min_intron_len/>\n");
      }

      indentlevel--;
      gth_indent(outfp, indentlevel);
      gt_file_xprintf(outfp, "</intron>\n");
    }

    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<exon e_serial=\"%lu\">\n", i + OUTPUTOFFSET);
    indentlevel++;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<gDNA_exon_boundary g_start=\"%lu\" g_stop="
                    "\"%lu\" g_length=\"%lu\"/>\n",
                    gth_sa_left_genomic_exon_border(sa, i),
                    gth_sa_right_genomic_exon_border(sa, i),
                    gth_sa_genomic_exon_length(sa, i));
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp,
                    "<reference_exon_boundary r_type=\"%s\" r_start=\"%lu\" "
                    "r_stop=\"%lu\" r_length=\"%lu\" r_score=\"%5.3f\"/>\n",
                    gth_sa_alphastring(sa),
                    leftreferenceexonborder  + OUTPUTOFFSET ,
                    rightreferenceexonborder + OUTPUTOFFSET ,
                    referenceexonlength, exonscore);
    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</exon>\n");
  }

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</exon-intron_info>\n");

  /* showing PPA line (if an poly-A tail was determined) */
  if (gth_sa_alphatype(sa) == DNA_ALPHA)
    xml_showppaline(sa, indentlevel, outfp);

  /* showing MATCH line */
  xml_showmatchline(sa, indentlevel, outfp);

  /* showing PGS line */
  xml_showpgsline(sa, indentlevel, outfp);
}
static void showalignmentheader(GthSA *sa, bool gs2out, int widthforgenpos,
                                GtUword minintronlength, GtFile *outfp)
{
  GtUword i, leftreferenceexonborder, rightreferenceexonborder,
                referenceexonlength;
  GthDbl exonscore, donorsitescore, acceptorsitescore;
  GthFlt donorsiteprobability, acceptorsiteprobability;
  Exoninfo *exoninfo;
  Introninfo *introninfo;

  gt_file_xprintf(outfp, "Predicted gene structure");
  if (gs2out) {
    gt_file_xprintf(outfp, " (within gDNA segment "GT_WU" to "GT_WU"):\n",
                       gth_sa_gen_dp_start_show(sa),
                       gth_sa_gen_dp_end_show(sa));
  }
  else
    gt_file_xprintf(outfp, ":\n");
  gt_file_xfputc('\n', outfp);

  for (i = 0; i < gth_sa_num_of_exons(sa); i++) {
    exoninfo = gth_sa_get_exon(sa, i);
    leftreferenceexonborder  = exoninfo->leftreferenceexonborder;
    rightreferenceexonborder = exoninfo->rightreferenceexonborder;
    referenceexonlength      = rightreferenceexonborder
                               - leftreferenceexonborder + 1;
    exonscore                = exoninfo->exonscore;

    if (i > 0) {
      introninfo = gth_sa_get_intron(sa, i-1);
      donorsiteprobability    = introninfo->donorsiteprobability;
      donorsitescore          = introninfo->donorsitescore;
      acceptorsiteprobability = introninfo->acceptorsiteprobability;
      acceptorsitescore       = introninfo->acceptorsitescore;

      gt_file_xprintf(outfp, "  Intron %2" GT_WUS " %*" GT_WUS " %*" GT_WUS
                      " (%4" GT_WUS " n); ",
                      i - 1 + OUTPUTOFFSET, widthforgenpos,
                      gth_sa_left_intron_border(sa, i-1), widthforgenpos,
                      gth_sa_right_intron_border(sa, i-1),
                      gth_sa_intron_length(sa, i-1));

      gt_file_xprintf(outfp, "Pd: %5.3f ", donorsiteprobability);
      if (gth_sa_alphatype(sa) == DNA_ALPHA) {
        if (donorsitescore == 0.0)
          gt_file_xprintf(outfp, "(s:    0), ");
        else
          gt_file_xprintf(outfp, "(s: %4.2f), ", donorsitescore);
      }
      else
        gt_file_xprintf(outfp, "  ");
      gt_file_xprintf(outfp, "Pa: %5.3f ", acceptorsiteprobability);
      if (gth_sa_alphatype(sa) == DNA_ALPHA) {
        if (acceptorsitescore == 0.0)
          gt_file_xprintf(outfp, "(s:    0)");
        else
          gt_file_xprintf(outfp, "(s: %4.2f)", acceptorsitescore);
      }
      /* if the intron is shorter or equal than the minimum intron length two
         question marks are shown at the end of the line */
      if (gth_sa_intron_length(sa, i-1) <= minintronlength)
        gt_file_xprintf(outfp, " ??");
      gt_file_xfputc('\n', outfp);
    }

    gt_file_xprintf(outfp,
                    " Exon %2" GT_WUS " %*" GT_WUS " %*" GT_WUS " (%4" GT_WUS
                    " n);  %s %6" GT_WUS " %6" GT_WUS " (%4" GT_WUS " %s); "
                    "score: %5.3f\n", i + OUTPUTOFFSET, widthforgenpos,
                    gth_sa_left_genomic_exon_border(sa, i), widthforgenpos,
                    gth_sa_right_genomic_exon_border(sa, i),
                    gth_sa_genomic_exon_length(sa, i), gth_sa_alphastring(sa),
                    leftreferenceexonborder  + OUTPUTOFFSET,
                    rightreferenceexonborder + OUTPUTOFFSET,
                    referenceexonlength,
                    gth_sa_alphatype(sa) == DNA_ALPHA ? "n" : "aa", exonscore);
  }

  /* showing PPA line (if an poly-A tail was determined) */
  if (gth_sa_alphatype(sa) == DNA_ALPHA)
    showppaline(sa, outfp);
  gt_file_xfputc('\n', outfp);

  /* showing MATCH line */
  showmatchline(sa, outfp);

  /* showing PGS line */
  showpgsline(sa, outfp);
}