Ejemplo n.º 1
0
static void xml_showgthreferenceinformation(GthSA *sa,
                                            GthInput *input,
                                            unsigned int indentlevel,
                                            GtFile *outfp)
{
  gt_assert(gth_sa_ref_file_num(sa) != GT_UNDEF_ULONG);

  gth_indent(outfp, indentlevel);

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp, "<reference ref_file=\"%s\" ref_id=\"%s\" "
                                "ref_strand=\"%c\" ref_description=\"",
                         gth_input_get_reference_filename(input,
                                                  gth_sa_ref_file_num(sa)),
                         gth_sa_ref_id(sa),
                         gth_sa_ref_strand_char(sa));
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "<reference ref_file=\"%s\" ref_id=\"%s\" "
                                "ref_description=\"",
                         gth_input_get_reference_filename(input,
                                                  gth_sa_ref_file_num(sa)),
                         gth_sa_ref_id(sa));
      break;
    default: gt_assert(0);
  }

  gth_input_echo_reference_description(input, gth_sa_ref_file_num(sa),
                                       gth_sa_ref_seq_num(sa), outfp);

  gt_file_xprintf(outfp, "\">\n");
}
Ejemplo n.º 2
0
static void showgthreferenceinformation(GthSA *sa, GthInput *input,
                                        bool showseqnums,
                                        GtFile *outfp)
{
  gt_assert(gth_sa_ref_file_num(sa) != GT_UNDEF_UWORD);

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp,
                         "EST Sequence: file=%s, strand=%c, description=",
                         gth_input_get_reference_filename(input,
                                                  gth_sa_ref_file_num(sa)),
                         gth_sa_ref_strand_char(sa));
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "Protein Sequence: file=%s, description=",
                         gth_input_get_reference_filename(input,
                                                 gth_sa_ref_file_num(sa)));
      break;
    default: gt_assert(0);
  }

  gth_sa_echo_reference_description(sa, input, outfp);

  if (showseqnums)
    gt_file_xprintf(outfp, ", seqnum="GT_WU"",  gth_sa_ref_seq_num(sa));

  gt_file_xfputc('\n', outfp);
  gt_file_xfputc('\n', outfp);
}
static void xml_inter_show_spliced_alignment(GthSA *sa, GthInput *input,
                                             unsigned int indentlevel,
                                             GtFile *outfp)
{
  bool dnaalpha = true;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<spliced_alignment xmlns=\"http://www.GenomeThreader.org/"
                  "SplicedAlignment/spliced_alignment/\">\n");
  indentlevel++;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referencealphatype>");
  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gt_file_xprintf(outfp, "DNA_ALPHA");
      break;
    case PROTEIN_ALPHA:
      gt_file_xprintf(outfp, "PROTEIN_ALPHA");
      dnaalpha = false;
      break;
    default: gt_assert(0);
  }
  gt_file_xprintf(outfp, "</referencealphatype>\n");

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<editoperations>\n");
  indentlevel++;
  gth_backtrace_path_show_complete(gth_sa_backtrace_path(sa), true, indentlevel,
                                   outfp);
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</editoperations>\n");

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<indelcount>"GT_WU"</indelcount>\n",
                     gth_sa_indelcount(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomiclengthDP>"GT_WU"</genomiclengthDP>\n",
                     gth_sa_gen_dp_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomiclengthtotal>"GT_WU"</genomiclengthtotal>\n",
                     gth_sa_gen_total_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicoffset>"GT_WU"</genomicoffset>\n",
                     gth_sa_gen_offset(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referencelength>"GT_WU"</referencelength>\n",
                     gth_sa_ref_total_length(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<dpstartpos>"GT_WU"</dpstartpos>\n",
                     gth_sa_gen_dp_start(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<dpendpos>"GT_WU"</dpendpos>\n",
                     gth_sa_gen_dp_end(sa));

  showgenomicfilename(sa, input, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicseqnum>"GT_WU"</genomicseqnum>\n",
                     gth_sa_gen_seq_num(sa));

  showreferencefilename(sa, input, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referenceseqnum>"GT_WU"</referenceseqnum>\n",
                     gth_sa_ref_seq_num(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicid>%s</genomicid>\n", gth_sa_gen_id(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<referenceid>%s</referenceid>\n",
                  gth_sa_ref_id(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                  "<genomicstrandisforward>%s</genomicstrandisforward>\n",
                  GTH_SHOWBOOL(gth_sa_gen_strand_forward(sa)));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp,
                    "<referencestrandisforward>%s</referencestrandisforward>\n",
                    GTH_SHOWBOOL(gth_sa_ref_strand_forward(sa)));

  showalignmentcutoffs(sa, indentlevel, outfp);

  showexons(sa, indentlevel, outfp);

  showintrons(sa, dnaalpha, indentlevel, outfp);

  showpolyAtailpos(sa, indentlevel, outfp);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<alignmentscore>%.*f</alignmentscore>\n",
                  PRECISION, gth_sa_score(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverage>%.*f</coverage>\n", PRECISION,
                     gth_sa_coverage(sa));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<coverageofgenomicsegmentishighest>%s"
                  "</coverageofgenomicsegmentishighest>\n",
                  GTH_SHOWBOOL(gth_sa_genomic_cov_is_highest(sa)));

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<cumulativelengthofscoredexons>"GT_WU""
                     "</cumulativelengthofscoredexons>\n",
                     gth_sa_cumlen_scored_exons(sa));

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</spliced_alignment>\n");
}
Ejemplo n.º 4
0
GtUword gth_sa_get_alignment_lines(const GthSA *sa,
                                         unsigned char **first_line,
                                         unsigned char **second_line,
                                         unsigned char **third_line,
                                         GtUword translationtable,
                                         GthInput *input)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  GT_UNUSED bool reverse_subject_pos = false;

  gt_assert(sa && first_line && second_line && third_line && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* sequences */
  unsigned char *gen_seq_orig, *ref_seq_orig;
  GtUword cols = 0;
  GthSeqCon *ref_seq_con;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig = (unsigned char*)
    gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa),
                                        gth_sa_gen_strand_forward(sa))
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      /* compute the two alignment lines */
      cols = gthfillthetwoalignmentlines(first_line,
                                         second_line,
                                         gen_seq_orig +
                                         genomicstartcutoff,
                                         gth_sa_gen_dp_length(sa) -
                                         genomictotalcutoff,
                                         ref_seq_orig +
                                         referencestartcutoff,
                                         gth_sa_ref_total_length(sa) -
                                         referencetotalcutoff,
                                         gth_sa_get_editoperations(sa),
                                         gth_sa_get_editoperations_length(sa),
                                         0,   /* linewidth not important here */
                                         0,   /* no short introns here */
                                         NULL,/* therefore no shortintroninfo */
                                         gth_sa_indelcount(sa));
      *third_line = NULL;
      break;
    case PROTEIN_ALPHA:
      /* compute the three alignment lines */
      cols = gthfillthethreealignmentlines(first_line,
                                           second_line,
                                           third_line,
                                           gth_sa_get_editoperations(sa),
                                           gth_sa_get_editoperations_length(sa),
                                           gth_sa_indelcount(sa),
                                           gen_seq_orig +
                                           genomicstartcutoff,
                                           gth_sa_gen_dp_length(sa) -
                                           genomictotalcutoff,
                                           ref_seq_orig +
                                           referencestartcutoff,
                                           gth_sa_ref_total_length(sa) -
                                           referencetotalcutoff,
                                           translationtable);
      break;
    default: gt_assert(0);
  }

  return cols;
}
Ejemplo n.º 5
0
void gth_sa_echo_alignment(const GthSA *sa, GtUword showintronmaxlen,
                           GtUword translationtable,
                           bool wildcardimplosion, GthInput *input,
                           GtFile *outfp)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  bool reverse_subject_pos = false;
  const unsigned char *gen_seq_orig, *ref_seq_orig;
  GthSeqCon *ref_seq_con;
  GtAlphabet *ref_alphabet;

  gt_assert(sa && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);
  ref_alphabet = gth_input_current_ref_alphabet(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig =
    gth_input_original_genomic_sequence(input, sa->gen_file_num,
                                        sa->gen_strand_forward)
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gthshowalignmentdna(outfp,ALIGNMENTLINEWIDTH,
                          gth_sa_get_editoperations(sa),
                          gth_sa_get_editoperations_length(sa),
                          gth_sa_indelcount(sa),
                          gen_seq_orig + genomicstartcutoff,
                          gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                          ref_seq_orig + referencestartcutoff,
                          gth_sa_ref_total_length(sa) -
                          referencetotalcutoff,
                          gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                          gth_sa_gen_offset(sa), referencestartcutoff,
                          gth_sa_gen_total_length(sa), showintronmaxlen,
                          ref_alphabet, reverse_subject_pos,
                          wildcardimplosion);
      break;
    case PROTEIN_ALPHA:
      gthshowalignmentprotein(outfp, ALIGNMENTLINEWIDTH,
                              gth_sa_get_editoperations(sa),
                              gth_sa_get_editoperations_length(sa),
                              gth_sa_indelcount(sa),
                              gen_seq_orig + genomicstartcutoff,
                              gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                              ref_seq_orig + referencestartcutoff,
                              gth_sa_ref_total_length(sa) -
                              referencetotalcutoff,
                              gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                              gth_sa_gen_offset(sa), referencestartcutoff,
                              gth_sa_gen_total_length(sa), showintronmaxlen,
                              ref_alphabet, translationtable,
                              gth_input_score_matrix(input),
                              gth_input_score_matrix_alpha(input),
                              reverse_subject_pos, wildcardimplosion);
      break;
    default: gt_assert(0);
  }
}