Пример #1
0
void gth_sa_echo_alignment(const GthSA *sa, GtUword showintronmaxlen,
                           GtUword translationtable,
                           bool wildcardimplosion, GthInput *input,
                           GtFile *outfp)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  bool reverse_subject_pos = false;
  const unsigned char *gen_seq_orig, *ref_seq_orig;
  GthSeqCon *ref_seq_con;
  GtAlphabet *ref_alphabet;

  gt_assert(sa && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);
  ref_alphabet = gth_input_current_ref_alphabet(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig =
    gth_input_original_genomic_sequence(input, sa->gen_file_num,
                                        sa->gen_strand_forward)
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      gthshowalignmentdna(outfp,ALIGNMENTLINEWIDTH,
                          gth_sa_get_editoperations(sa),
                          gth_sa_get_editoperations_length(sa),
                          gth_sa_indelcount(sa),
                          gen_seq_orig + genomicstartcutoff,
                          gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                          ref_seq_orig + referencestartcutoff,
                          gth_sa_ref_total_length(sa) -
                          referencetotalcutoff,
                          gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                          gth_sa_gen_offset(sa), referencestartcutoff,
                          gth_sa_gen_total_length(sa), showintronmaxlen,
                          ref_alphabet, reverse_subject_pos,
                          wildcardimplosion);
      break;
    case PROTEIN_ALPHA:
      gthshowalignmentprotein(outfp, ALIGNMENTLINEWIDTH,
                              gth_sa_get_editoperations(sa),
                              gth_sa_get_editoperations_length(sa),
                              gth_sa_indelcount(sa),
                              gen_seq_orig + genomicstartcutoff,
                              gth_sa_gen_dp_length(sa) - genomictotalcutoff,
                              ref_seq_orig + referencestartcutoff,
                              gth_sa_ref_total_length(sa) -
                              referencetotalcutoff,
                              gth_sa_gen_dp_start(sa) + genomicstartcutoff -
                              gth_sa_gen_offset(sa), referencestartcutoff,
                              gth_sa_gen_total_length(sa), showintronmaxlen,
                              ref_alphabet, translationtable,
                              gth_input_score_matrix(input),
                              gth_input_score_matrix_alpha(input),
                              reverse_subject_pos, wildcardimplosion);
      break;
    default: gt_assert(0);
  }
}
Пример #2
0
GtUword gth_sa_get_alignment_lines(const GthSA *sa,
                                         unsigned char **first_line,
                                         unsigned char **second_line,
                                         unsigned char **third_line,
                                         GtUword translationtable,
                                         GthInput *input)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  GT_UNUSED bool reverse_subject_pos = false;

  gt_assert(sa && first_line && second_line && third_line && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* sequences */
  unsigned char *gen_seq_orig, *ref_seq_orig;
  GtUword cols = 0;
  GthSeqCon *ref_seq_con;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig = (unsigned char*)
    gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa),
                                        gth_sa_gen_strand_forward(sa))
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      /* compute the two alignment lines */
      cols = gthfillthetwoalignmentlines(first_line,
                                         second_line,
                                         gen_seq_orig +
                                         genomicstartcutoff,
                                         gth_sa_gen_dp_length(sa) -
                                         genomictotalcutoff,
                                         ref_seq_orig +
                                         referencestartcutoff,
                                         gth_sa_ref_total_length(sa) -
                                         referencetotalcutoff,
                                         gth_sa_get_editoperations(sa),
                                         gth_sa_get_editoperations_length(sa),
                                         0,   /* linewidth not important here */
                                         0,   /* no short introns here */
                                         NULL,/* therefore no shortintroninfo */
                                         gth_sa_indelcount(sa));
      *third_line = NULL;
      break;
    case PROTEIN_ALPHA:
      /* compute the three alignment lines */
      cols = gthfillthethreealignmentlines(first_line,
                                           second_line,
                                           third_line,
                                           gth_sa_get_editoperations(sa),
                                           gth_sa_get_editoperations_length(sa),
                                           gth_sa_indelcount(sa),
                                           gen_seq_orig +
                                           genomicstartcutoff,
                                           gth_sa_gen_dp_length(sa) -
                                           genomictotalcutoff,
                                           ref_seq_orig +
                                           referencestartcutoff,
                                           gth_sa_ref_total_length(sa) -
                                           referencetotalcutoff,
                                           translationtable);
      break;
    default: gt_assert(0);
  }

  return cols;
}
Пример #3
0
void gt_outputtranslationandorf(unsigned long pglnum, const GthAGS *ags,
                                unsigned long agsnum,
                                unsigned long translationtable,
                                GthInput *input,
                                unsigned int indentlevel,
                                GthOutput *out)
{
  unsigned long i;
  unsigned int nframe;
  const unsigned char *gen_seq_orig;
  GtStr *frame[3];
  char translated;
  GtTranslatorStatus status;
  GtTranslator *translator;
  GtTransTable *transtable;
  GtCodonIterator *ci;
  GthSplicedSeq *spliced_seq;
  GtArray *ranges;
  GtFile *outfp = out->outfp;

  /* output header */
  if (out->xmlout) {
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<three_phase_translation "
                    "xmlns=\"http://www.genomethreader.org/GTH_output/"
                    "PGL_module/predicted_gene_location/AGS_information/"
                    "three_phase_translation/\">\n");
    indentlevel++;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "<description PGL_serial=\"%lu\" "
                              "AGS_serial=\"%lu\" gDNA_strand=\"%c\"/>\n",
                       pglnum + OUTPUTOFFSET, agsnum + OUTPUTOFFSET,
                       SHOWSTRAND(gth_ags_is_forward(ags)));
  }
  else {
    gt_file_xprintf(outfp, "3-phase translation of AGS-%lu (%cstrand):\n\n",
                       agsnum + OUTPUTOFFSET,
                       SHOWSTRAND(gth_ags_is_forward(ags)));
  }

  ranges = gt_array_new(sizeof (GtRange));
  for (i = 0; i < gt_array_size(ags->exons); i++)
    gt_array_add(ranges, ((GthExonAGS*) gt_array_get(ags->exons, i))->range);

  /* get genomic sequence */
  gen_seq_orig = gth_input_original_genomic_sequence(input,
                                                     gth_ags_filenum(ags),
                                                     gth_ags_is_forward(ags));

  spliced_seq = gth_spliced_seq_new(gen_seq_orig, ranges);

  frame[0] = gt_str_new();
  frame[1] = gt_str_new();
  frame[2] = gt_str_new();

  /* prepare for translation */
  ci = gt_codon_iterator_simple_new((const char*) spliced_seq->splicedseq,
                                    spliced_seq->splicedseqlen, NULL);
  gt_assert(ci);
  transtable = gt_trans_table_new(translationtable, NULL);
  gt_assert(transtable);

  /* translate the template in all three frames */
  translator = gt_translator_new_with_table(transtable, ci);
  status = gt_translator_next(translator, &translated, &nframe, NULL);
  while (status == GT_TRANSLATOR_OK) {
    gt_str_append_char(frame[nframe], translated);
    status = gt_translator_next(translator, &translated, &nframe, NULL);
  }
  gt_assert(status != GT_TRANSLATOR_ERROR);
  gt_translator_delete(translator);
  gt_trans_table_delete(transtable);
  gt_codon_iterator_delete(ci);

  /* show the translation */
  showtranslation(spliced_seq, gt_str_get(frame[0]), gt_str_get(frame[1]),
                  gt_str_get(frame[2]), ags->exons, gth_ags_is_forward(ags),
                  gth_ags_total_length(ags), gth_ags_genomic_offset(ags),
                  indentlevel, out);

  /* show the (consolidated) ORFs */
  gthshowORFs(gt_str_get(frame[0]), gt_str_get(frame[1]), gt_str_get(frame[2]),
              gt_str_length(frame[0]), gt_str_length(frame[1]),
              gt_str_length(frame[2]), gth_ags_is_forward(ags),
              gth_ags_total_length(ags), gth_ags_genomic_offset(ags),
              gt_str_get(ags->gen_id), pglnum, agsnum, spliced_seq,
              indentlevel, out);

  if (out->xmlout) {
    indentlevel--;
    gth_indent(outfp, indentlevel);
    gt_file_xprintf(outfp, "</three_phase_translation>\n");
  }

  gth_spliced_seq_delete(spliced_seq);
  gt_array_delete(ranges);
  gt_str_delete(frame[0]);
  gt_str_delete(frame[1]);
  gt_str_delete(frame[2]);
}