Esempio n. 1
0
static void xml_showgthgenomicinformation(GthSA *sa,
                                          GthInput *input,
                                          unsigned int indentlevel,
                                          GtFile *outfp)
{
  gt_assert(gth_sa_gen_file_num(sa) != GT_UNDEF_ULONG);

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<gDNA_segment>\n");
  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<template temp_file=\"%s\" temp_id=\"%s\" "
                            "temp_strand=\"%c\" temp_description=\"",
                     gth_input_get_genomic_filename(input,
                                                    gth_sa_gen_file_num(sa)),
                     gth_sa_gen_id(sa),
                     gth_sa_gen_strand_char(sa));

  gth_input_echo_genomic_description(input, gth_sa_gen_file_num(sa),
                                     gth_sa_gen_seq_num(sa), outfp);

  gt_file_xprintf(outfp, "\">\n");
  indentlevel++;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<position start=\"%lu\" stop=\"%lu\"/>\n",
                     gth_sa_gen_dp_start_show(sa),
                     gth_sa_gen_dp_end_show(sa));
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</template>\n");
  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</gDNA_segment>\n");
}
Esempio n. 2
0
static bool cluster_is_consistent(GtArray *pgls)
{
  GtUword i, j, maxright = GT_UNDEF_UWORD, gen_file_num = GT_UNDEF_UWORD;
  GthPGL *pgl;
  bool strandsign = GT_UNDEF_BOOL;
  GthSA *sa;
  GtRange range;

  for (i = 0; i < gt_array_size(pgls); i++) {
    pgl = *(GthPGL**) gt_array_get(pgls, i);

    for (j = 0; j < gt_array_size(pgl->alignments); j++) {
      sa = *(GthSA**) gt_array_get(pgl->alignments, j);
      if (j == 0) {
        /* save genomic file number of this cluster */
        gen_file_num = gth_sa_gen_file_num(sa);

        /* save strand sign of this cluster */
        strandsign = gth_sa_gen_strand_forward(sa);

        /* set maxright to right border of first SA */
        range = gth_sa_range_forward(sa);
        maxright = range.end;
      }
      else {
        /* check if all genomic file numbers are the same */
        if (gth_sa_gen_file_num(sa) != gen_file_num)
          return false;

        /* check if all strand signs of this cluster are equal */
        if (gth_sa_gen_strand_forward(sa) != strandsign)
          return false;

        /* check for cluster condition */
        range = gth_sa_range_forward(sa);
        if (range.start > maxright)
          return false;
        if (range.end > maxright)
          maxright = range.end;
      }
    }
  }

  return true;
}
Esempio n. 3
0
static void saveSAtoPGLs(GtUword *gen_file_num, GtUword *maxright,
                         GtUword *currentPGLindex, GtArray *pgls,
                         GthSA *sa)
{
  GtRange range;

  /* in this case save SA */
  range = gth_sa_range_forward(sa);
  if ((*gen_file_num == GT_UNDEF_UWORD) ||
      (gth_sa_gen_file_num(sa) != *gen_file_num) ||
      (range.start > *maxright)) {
    storeSAinnewPGL(pgls, currentPGLindex, sa);
    *gen_file_num = gth_sa_gen_file_num(sa);
    *maxright = range.end;
  }
  else {
    storeSAincurrentPGL(pgls, *currentPGLindex, sa);
    if (range.end > *maxright)
      *maxright = range.end;
  }
}
static void showgthgenomicinformation(GthSA *sa, GthInput *input,
                                      bool showseqnums, GtFile *outfp)
{
  gt_assert(gth_sa_gen_file_num(sa) != GT_UNDEF_UWORD);

  gt_file_xprintf(outfp, "Genomic Template: file=%s, strand=%c, from="GT_WU", "
                            "to="GT_WU", description=",
                     gth_input_get_genomic_filename(input,
                                                    gth_sa_gen_file_num(sa)),
                     gth_sa_gen_strand_char(sa),
                     gth_sa_gen_dp_start_show(sa),
                     gth_sa_gen_dp_end_show(sa));

  gth_sa_echo_genomic_description(sa, input, outfp);

  if (showseqnums)
    gt_file_xprintf(outfp, ", seqnum="GT_WU"",  gth_sa_gen_seq_num(sa));

  gt_file_xfputc('\n', outfp);
  gt_file_xfputc('\n', outfp);
}
static void showgenomicfilename(GthSA *sa, GthInput *input,
                                unsigned int indentlevel, GtFile *outfp)
{
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicfile>\n");
  indentlevel++;

  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicfilename>%s</genomicfilename>\n",
                     gth_input_get_genomic_filename(input,
                                                   gth_sa_gen_file_num(sa)));
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "<genomicfilehash>%s</genomicfilehash>\n",
                     GTH_UNDEFINED_HASH);

  indentlevel--;
  gth_indent(outfp, indentlevel);
  gt_file_xprintf(outfp, "</genomicfile>\n");
}
Esempio n. 6
0
GtUword gth_sa_get_alignment_lines(const GthSA *sa,
                                         unsigned char **first_line,
                                         unsigned char **second_line,
                                         unsigned char **third_line,
                                         GtUword translationtable,
                                         GthInput *input)
{
  GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff,
                referencestartcutoff, referenceendcutoff, referencetotalcutoff;
  GT_UNUSED bool reverse_subject_pos = false;

  gt_assert(sa && first_line && second_line && third_line && input);

  /* only for cosmetic reasons */
  genomicstartcutoff   = gth_sa_genomiccutoff_start(sa);
  genomicendcutoff     = gth_sa_genomiccutoff_end(sa);
  genomictotalcutoff   = genomicstartcutoff + genomicendcutoff;
  referencestartcutoff = gth_sa_referencecutoff_start(sa);
  referenceendcutoff   = gth_sa_referencecutoff_end(sa);
  referencetotalcutoff = referencestartcutoff + referenceendcutoff;

  /* sequences */
  unsigned char *gen_seq_orig, *ref_seq_orig;
  GtUword cols = 0;
  GthSeqCon *ref_seq_con;

  /* make sure that the correct files are loaded */
  gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false);
  ref_seq_con = gth_input_current_ref_seq_con(input);

  /* If the reverse complement of the genomic DNA is considered, this
     opition is needed for correct output of the genomic sequence positions
     by the function showalignmentgeneric() */
  if (!gth_sa_gen_strand_forward(sa))
    reverse_subject_pos = true;

  /* get genomic sequence */
  gen_seq_orig = (unsigned char*)
    gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa),
                                        gth_sa_gen_strand_forward(sa))
    + gth_sa_gen_dp_start(sa);

  /* get reference sequence */
  if (gth_sa_ref_strand_forward(sa)) {
    ref_seq_orig =
      gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa));
  }
  else {
    ref_seq_orig =
      gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa));
  }

  switch (gth_sa_alphatype(sa)) {
    case DNA_ALPHA:
      /* compute the two alignment lines */
      cols = gthfillthetwoalignmentlines(first_line,
                                         second_line,
                                         gen_seq_orig +
                                         genomicstartcutoff,
                                         gth_sa_gen_dp_length(sa) -
                                         genomictotalcutoff,
                                         ref_seq_orig +
                                         referencestartcutoff,
                                         gth_sa_ref_total_length(sa) -
                                         referencetotalcutoff,
                                         gth_sa_get_editoperations(sa),
                                         gth_sa_get_editoperations_length(sa),
                                         0,   /* linewidth not important here */
                                         0,   /* no short introns here */
                                         NULL,/* therefore no shortintroninfo */
                                         gth_sa_indelcount(sa));
      *third_line = NULL;
      break;
    case PROTEIN_ALPHA:
      /* compute the three alignment lines */
      cols = gthfillthethreealignmentlines(first_line,
                                           second_line,
                                           third_line,
                                           gth_sa_get_editoperations(sa),
                                           gth_sa_get_editoperations_length(sa),
                                           gth_sa_indelcount(sa),
                                           gen_seq_orig +
                                           genomicstartcutoff,
                                           gth_sa_gen_dp_length(sa) -
                                           genomictotalcutoff,
                                           ref_seq_orig +
                                           referencestartcutoff,
                                           gth_sa_ref_total_length(sa) -
                                           referencetotalcutoff,
                                           translationtable);
      break;
    default: gt_assert(0);
  }

  return cols;
}