static bool cluster_is_consistent(GtArray *pgls)
{
  GtUword i, j, maxright = GT_UNDEF_UWORD, gen_file_num = GT_UNDEF_UWORD;
  GthPGL *pgl;
  bool strandsign = GT_UNDEF_BOOL;
  GthSA *sa;
  GtRange range;

  for (i = 0; i < gt_array_size(pgls); i++) {
    pgl = *(GthPGL**) gt_array_get(pgls, i);

    for (j = 0; j < gt_array_size(pgl->alignments); j++) {
      sa = *(GthSA**) gt_array_get(pgl->alignments, j);
      if (j == 0) {
        /* save genomic file number of this cluster */
        gen_file_num = gth_sa_gen_file_num(sa);

        /* save strand sign of this cluster */
        strandsign = gth_sa_gen_strand_forward(sa);

        /* set maxright to right border of first SA */
        range = gth_sa_range_forward(sa);
        maxright = range.end;
      }
      else {
        /* check if all genomic file numbers are the same */
        if (gth_sa_gen_file_num(sa) != gen_file_num)
          return false;

        /* check if all strand signs of this cluster are equal */
        if (gth_sa_gen_strand_forward(sa) != strandsign)
          return false;

        /* check for cluster condition */
        range = gth_sa_range_forward(sa);
        if (range.start > maxright)
          return false;
        if (range.end > maxright)
          maxright = range.end;
      }
    }
  }

  return true;
}
static void storeSAinnewPGL(GtArray *pgls, GtUword *currentPGLindex,
                            GthSA *sa)
{
  GthPGL *pgl;

  pgl = gth_pgl_new(gth_sa_gen_strand_forward(sa));
  pgl->maxrange.start = gth_sa_get_exon(sa, 0)->leftgenomicexonborder;
  pgl->maxrange.end = gth_sa_get_exon(sa,gth_sa_num_of_exons(sa)-1)
                      ->rightgenomicexonborder;

  gth_pgl_add_sa(pgl, sa);
  gt_array_add(pgls, pgl);

  /* set the current PGL index */
  *currentPGLindex = gt_array_size(pgls) - 1;
}
void gthclusterSAstoPGLs(GtArray *pgls, GthSACollection *sa_collection)
{
  GtUword forwardgen_file_num,   /* the genomic file number of the
                                            current forward cluster */
                forwardmaxright,         /* the maximal right position of the
                                            current forward cluster */
                forward_currentPGLindex, /* the current forward PGL index */
                reversegen_file_num,   /* the genomic file number of the
                                            current reverse cluster */
                reversemaxright,         /* the maximal right position of the
                                            current reverse cluster */
                reverse_currentPGLindex; /* the current reverse PGL index */
  GthSACollectionIterator *iterator;
  GthSA *sa;
  gt_assert(sa_collection);

  /* init */
  forwardgen_file_num   = GT_UNDEF_UWORD;
  forwardmaxright         = GT_UNDEF_UWORD;
  forward_currentPGLindex = GT_UNDEF_UWORD;
  reversegen_file_num   = GT_UNDEF_UWORD;
  reversemaxright         = GT_UNDEF_UWORD;
  reverse_currentPGLindex = GT_UNDEF_UWORD;

  /* cluster the SAs */
  iterator = gth_sa_collection_iterator_new(sa_collection);
  while ((sa = gth_sa_collection_iterator_next(iterator))) {
    if (gth_sa_gen_strand_forward(sa)) {
      saveSAtoPGLs(&forwardgen_file_num, &forwardmaxright,
                   &forward_currentPGLindex, pgls, sa);
    }
    else {
      saveSAtoPGLs(&reversegen_file_num, &reversemaxright,
                   &reverse_currentPGLindex, pgls, sa);
    }
  }
  gth_sa_collection_iterator_delete(iterator);

  /* cluster is consistent */
  gt_assert(cluster_is_consistent(pgls));
}
static void xml_inter_show_spliced_alignment(GthSA *sa, GthInput *input,
                                             unsigned int indentlevel,
                                             GtFile *outfp)
{
  bool dnaalpha = true;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<spliced_alignment xmlns=\"http://www.GenomeThreader.org/"
                  "SplicedAlignment/spliced_alignment/\">\n");
  indentlevel++;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referencealphatype>");
  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp, "DNA_ALPHA");
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "PROTEIN_ALPHA");
      dnaalpha = false;
      break;
    default: gt_assert(0);
  }
  gt_file_xprintf(outfp, "</referencealphatype>\n");

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<editoperations>\n");
  indentlevel++;
  gth_backtrace_path_show_complete(gth_sa_backtrace_path(sa), true, indentlevel,
                                   outfp);
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</editoperations>\n");

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<indelcount>"GT_WU"</indelcount>\n",
                     gth_sa_indelcount(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomiclengthDP>"GT_WU"</genomiclengthDP>\n",
                     gth_sa_gen_dp_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomiclengthtotal>"GT_WU"</genomiclengthtotal>\n",
                     gth_sa_gen_total_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicoffset>"GT_WU"</genomicoffset>\n",
                     gth_sa_gen_offset(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referencelength>"GT_WU"</referencelength>\n",
                     gth_sa_ref_total_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<dpstartpos>"GT_WU"</dpstartpos>\n",
                     gth_sa_gen_dp_start(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<dpendpos>"GT_WU"</dpendpos>\n",
                     gth_sa_gen_dp_end(sa));

  showgenomicfilename(sa, input, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicseqnum>"GT_WU"</genomicseqnum>\n",
                     gth_sa_gen_seq_num(sa));

  showreferencefilename(sa, input, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referenceseqnum>"GT_WU"</referenceseqnum>\n",
                     gth_sa_ref_seq_num(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicid>%s</genomicid>\n", gth_sa_gen_id(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referenceid>%s</referenceid>\n",
                  gth_sa_ref_id(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<genomicstrandisforward>%s</genomicstrandisforward>\n",
                  GTH_SHOWBOOL(gth_sa_gen_strand_forward(sa)));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                    "<referencestrandisforward>%s</referencestrandisforward>\n",
                    GTH_SHOWBOOL(gth_sa_ref_strand_forward(sa)));

  showalignmentcutoffs(sa, indentlevel, outfp);

  showexons(sa, indentlevel, outfp);

  showintrons(sa, dnaalpha, indentlevel, outfp);

  showpolyAtailpos(sa, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<alignmentscore>%.*f</alignmentscore>\n",
                  PRECISION, gth_sa_score(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverage>%.*f</coverage>\n", PRECISION,
                     gth_sa_coverage(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverageofgenomicsegmentishighest>%s"
                  "</coverageofgenomicsegmentishighest>\n",
                  GTH_SHOWBOOL(gth_sa_genomic_cov_is_highest(sa)));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<cumulativelengthofscoredexons>"GT_WU""
                     "</cumulativelengthofscoredexons>\n",
                     gth_sa_cumlen_scored_exons(sa));

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</spliced_alignment>\n");
}
Beispiel #5
0
static void xml_final_show_spliced_alignment(GthSA *sa, GthInput *input,
                                             unsigned long minintronlength,
                                             unsigned long translationtable,
                                             unsigned int indentlevel,
                                             GtFile *outfp)
{
  unsigned char *first_line, *second_line, *third_line;
  GT_UNUSED bool reverse_subject_pos = false;
  unsigned long cols;

  gt_assert(sa && input);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<spliced_alignment xmlns=\"http://www.genomethreader.org/"
                  "GTH_output/alignment_module/spliced_alignment/\">\n");
  indentlevel++;

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  xml_showgthreferenceinformation(sa, input, indentlevel, outfp);

  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<seq>");
  gth_sa_echo_reference_sequence(sa, input, false, outfp);
  gt_file_xprintf(outfp, "</seq>\n");
  indentlevel--;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</reference>\n");
  xml_showgthgenomicinformation(sa, input, indentlevel, outfp);

  xml_showalignmentheader(sa, minintronlength, indentlevel, outfp);

  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<alignment>\n");

  /* compute the alignment lines */
  cols = gth_sa_get_alignment_lines(sa, &first_line, &second_line, &third_line,
                                    translationtable, input);

  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genome_strand>");
  showconcreteline(first_line, cols, outfp);
  gt_file_xprintf(outfp, "</genome_strand>\n");
  gth_indent(outfp, indentlevel);
  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp, "<mrna_strand>");
      showconcreteline(second_line, cols, outfp);
      gt_file_xprintf(outfp, "</mrna_strand>\n");
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "<genomeProt>");
      showconcreteline(second_line, cols, outfp);
      gt_file_xprintf(outfp, "</genomeProt>\n");
      gth_indent(outfp, indentlevel);
      gt_file_xprintf(outfp, "<queryProt>");
      showconcreteline(third_line, cols, outfp);
      gt_file_xprintf(outfp, "</queryProt>\n");

      break;
    default: gt_assert(0);
  }

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</alignment>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</predicted_gene_structure>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</spliced_alignment>\n");

  /* free */
  gt_free(first_line);
  gt_free(second_line);
  gt_free(third_line);
}
Beispiel #6
0
GtUword gth_sa_get_alignment_lines(const GthSA *sa,
                                         unsigned char **first_line,
                                         unsigned char **second_line,
                                         unsigned char **third_line,
                                         GtUword translationtable,
                                         GthInput *input)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  GT_UNUSED bool reverse_subject_pos = false;

  gt_assert(sa && first_line && second_line && third_line && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* sequences */
  unsigned char *gen_seq_orig, *ref_seq_orig;
  GtUword cols = 0;
  GthSeqCon *ref_seq_con;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig = (unsigned char*)
    gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa),
                                        gth_sa_gen_strand_forward(sa))
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      /* compute the two alignment lines */
      cols = gthfillthetwoalignmentlines(first_line,
                                         second_line,
                                         gen_seq_orig +
                                         genomicstartcutoff,
                                         gth_sa_gen_dp_length(sa) -
                                         genomictotalcutoff,
                                         ref_seq_orig +
                                         referencestartcutoff,
                                         gth_sa_ref_total_length(sa) -
                                         referencetotalcutoff,
                                         gth_sa_get_editoperations(sa),
                                         gth_sa_get_editoperations_length(sa),
                                         0,   /* linewidth not important here */
                                         0,   /* no short introns here */
                                         NULL,/* therefore no shortintroninfo */
                                         gth_sa_indelcount(sa));
      *third_line = NULL;
      break;
    case PROTEIN_ALPHA:
      /* compute the three alignment lines */
      cols = gthfillthethreealignmentlines(first_line,
                                           second_line,
                                           third_line,
                                           gth_sa_get_editoperations(sa),
                                           gth_sa_get_editoperations_length(sa),
                                           gth_sa_indelcount(sa),
                                           gen_seq_orig +
                                           genomicstartcutoff,
                                           gth_sa_gen_dp_length(sa) -
                                           genomictotalcutoff,
                                           ref_seq_orig +
                                           referencestartcutoff,
                                           gth_sa_ref_total_length(sa) -
                                           referencetotalcutoff,
                                           translationtable);
      break;
    default: gt_assert(0);
  }

  return cols;
}
Beispiel #7
0
void gth_sa_echo_alignment(const GthSA *sa, GtUword showintronmaxlen,
                           GtUword translationtable,
                           bool wildcardimplosion, GthInput *input,
                           GtFile *outfp)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  bool reverse_subject_pos = false;
  const unsigned char *gen_seq_orig, *ref_seq_orig;
  GthSeqCon *ref_seq_con;
  GtAlphabet *ref_alphabet;

  gt_assert(sa && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);
  ref_alphabet = gth_input_current_ref_alphabet(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig =
    gth_input_original_genomic_sequence(input, sa->gen_file_num,
                                        sa->gen_strand_forward)
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gthshowalignmentdna(outfp,ALIGNMENTLINEWIDTH,
                          gth_sa_get_editoperations(sa),
                          gth_sa_get_editoperations_length(sa),
                          gth_sa_indelcount(sa),
                          gen_seq_orig + genomicstartcutoff,
                          gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                          ref_seq_orig + referencestartcutoff,
                          gth_sa_ref_total_length(sa) -
                          referencetotalcutoff,
                          gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                          gth_sa_gen_offset(sa), referencestartcutoff,
                          gth_sa_gen_total_length(sa), showintronmaxlen,
                          ref_alphabet, reverse_subject_pos,
                          wildcardimplosion);
      break;
    case PROTEIN_ALPHA:
      gthshowalignmentprotein(outfp, ALIGNMENTLINEWIDTH,
                              gth_sa_get_editoperations(sa),
                              gth_sa_get_editoperations_length(sa),
                              gth_sa_indelcount(sa),
                              gen_seq_orig + genomicstartcutoff,
                              gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                              ref_seq_orig + referencestartcutoff,
                              gth_sa_ref_total_length(sa) -
                              referencetotalcutoff,
                              gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                              gth_sa_gen_offset(sa), referencestartcutoff,
                              gth_sa_gen_total_length(sa), showintronmaxlen,
                              ref_alphabet, translationtable,
                              gth_input_score_matrix(input),
                              gth_input_score_matrix_alpha(input),
                              reverse_subject_pos, wildcardimplosion);
      break;
    default: gt_assert(0);
  }
}