static void xml_showgthgenomicinformation(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { gt_assert(gth_sa_gen_file_num(sa) != GT_UNDEF_ULONG); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_segment>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<template temp_file=\"%s\" temp_id=\"%s\" " "temp_strand=\"%c\" temp_description=\"", gth_input_get_genomic_filename(input, gth_sa_gen_file_num(sa)), gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa)); gth_input_echo_genomic_description(input, gth_sa_gen_file_num(sa), gth_sa_gen_seq_num(sa), outfp); gt_file_xprintf(outfp, "\">\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<position start=\"%lu\" stop=\"%lu\"/>\n", gth_sa_gen_dp_start_show(sa), gth_sa_gen_dp_end_show(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</template>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</gDNA_segment>\n"); }
static bool cluster_is_consistent(GtArray *pgls) { GtUword i, j, maxright = GT_UNDEF_UWORD, gen_file_num = GT_UNDEF_UWORD; GthPGL *pgl; bool strandsign = GT_UNDEF_BOOL; GthSA *sa; GtRange range; for (i = 0; i < gt_array_size(pgls); i++) { pgl = *(GthPGL**) gt_array_get(pgls, i); for (j = 0; j < gt_array_size(pgl->alignments); j++) { sa = *(GthSA**) gt_array_get(pgl->alignments, j); if (j == 0) { /* save genomic file number of this cluster */ gen_file_num = gth_sa_gen_file_num(sa); /* save strand sign of this cluster */ strandsign = gth_sa_gen_strand_forward(sa); /* set maxright to right border of first SA */ range = gth_sa_range_forward(sa); maxright = range.end; } else { /* check if all genomic file numbers are the same */ if (gth_sa_gen_file_num(sa) != gen_file_num) return false; /* check if all strand signs of this cluster are equal */ if (gth_sa_gen_strand_forward(sa) != strandsign) return false; /* check for cluster condition */ range = gth_sa_range_forward(sa); if (range.start > maxright) return false; if (range.end > maxright) maxright = range.end; } } } return true; }
static void saveSAtoPGLs(GtUword *gen_file_num, GtUword *maxright, GtUword *currentPGLindex, GtArray *pgls, GthSA *sa) { GtRange range; /* in this case save SA */ range = gth_sa_range_forward(sa); if ((*gen_file_num == GT_UNDEF_UWORD) || (gth_sa_gen_file_num(sa) != *gen_file_num) || (range.start > *maxright)) { storeSAinnewPGL(pgls, currentPGLindex, sa); *gen_file_num = gth_sa_gen_file_num(sa); *maxright = range.end; } else { storeSAincurrentPGL(pgls, *currentPGLindex, sa); if (range.end > *maxright) *maxright = range.end; } }
static void showgthgenomicinformation(GthSA *sa, GthInput *input, bool showseqnums, GtFile *outfp) { gt_assert(gth_sa_gen_file_num(sa) != GT_UNDEF_UWORD); gt_file_xprintf(outfp, "Genomic Template: file=%s, strand=%c, from="GT_WU", " "to="GT_WU", description=", gth_input_get_genomic_filename(input, gth_sa_gen_file_num(sa)), gth_sa_gen_strand_char(sa), gth_sa_gen_dp_start_show(sa), gth_sa_gen_dp_end_show(sa)); gth_sa_echo_genomic_description(sa, input, outfp); if (showseqnums) gt_file_xprintf(outfp, ", seqnum="GT_WU"", gth_sa_gen_seq_num(sa)); gt_file_xfputc('\n', outfp); gt_file_xfputc('\n', outfp); }
static void showgenomicfilename(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicfile>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicfilename>%s</genomicfilename>\n", gth_input_get_genomic_filename(input, gth_sa_gen_file_num(sa))); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicfilehash>%s</genomicfilehash>\n", GTH_UNDEFINED_HASH); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</genomicfile>\n"); }
GtUword gth_sa_get_alignment_lines(const GthSA *sa, unsigned char **first_line, unsigned char **second_line, unsigned char **third_line, GtUword translationtable, GthInput *input) { GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff, referencestartcutoff, referenceendcutoff, referencetotalcutoff; GT_UNUSED bool reverse_subject_pos = false; gt_assert(sa && first_line && second_line && third_line && input); /* only for cosmetic reasons */ genomicstartcutoff = gth_sa_genomiccutoff_start(sa); genomicendcutoff = gth_sa_genomiccutoff_end(sa); genomictotalcutoff = genomicstartcutoff + genomicendcutoff; referencestartcutoff = gth_sa_referencecutoff_start(sa); referenceendcutoff = gth_sa_referencecutoff_end(sa); referencetotalcutoff = referencestartcutoff + referenceendcutoff; /* sequences */ unsigned char *gen_seq_orig, *ref_seq_orig; GtUword cols = 0; GthSeqCon *ref_seq_con; /* make sure that the correct files are loaded */ gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false); ref_seq_con = gth_input_current_ref_seq_con(input); /* If the reverse complement of the genomic DNA is considered, this opition is needed for correct output of the genomic sequence positions by the function showalignmentgeneric() */ if (!gth_sa_gen_strand_forward(sa)) reverse_subject_pos = true; /* get genomic sequence */ gen_seq_orig = (unsigned char*) gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa), gth_sa_gen_strand_forward(sa)) + gth_sa_gen_dp_start(sa); /* get reference sequence */ if (gth_sa_ref_strand_forward(sa)) { ref_seq_orig = gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa)); } else { ref_seq_orig = gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa)); } switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: /* compute the two alignment lines */ cols = gthfillthetwoalignmentlines(first_line, second_line, gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), 0, /* linewidth not important here */ 0, /* no short introns here */ NULL,/* therefore no shortintroninfo */ gth_sa_indelcount(sa)); *third_line = NULL; break; case PROTEIN_ALPHA: /* compute the three alignment lines */ cols = gthfillthethreealignmentlines(first_line, second_line, third_line, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), gth_sa_indelcount(sa), gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, translationtable); break; default: gt_assert(0); } return cols; }