static bool cluster_is_consistent(GtArray *pgls) { GtUword i, j, maxright = GT_UNDEF_UWORD, gen_file_num = GT_UNDEF_UWORD; GthPGL *pgl; bool strandsign = GT_UNDEF_BOOL; GthSA *sa; GtRange range; for (i = 0; i < gt_array_size(pgls); i++) { pgl = *(GthPGL**) gt_array_get(pgls, i); for (j = 0; j < gt_array_size(pgl->alignments); j++) { sa = *(GthSA**) gt_array_get(pgl->alignments, j); if (j == 0) { /* save genomic file number of this cluster */ gen_file_num = gth_sa_gen_file_num(sa); /* save strand sign of this cluster */ strandsign = gth_sa_gen_strand_forward(sa); /* set maxright to right border of first SA */ range = gth_sa_range_forward(sa); maxright = range.end; } else { /* check if all genomic file numbers are the same */ if (gth_sa_gen_file_num(sa) != gen_file_num) return false; /* check if all strand signs of this cluster are equal */ if (gth_sa_gen_strand_forward(sa) != strandsign) return false; /* check for cluster condition */ range = gth_sa_range_forward(sa); if (range.start > maxright) return false; if (range.end > maxright) maxright = range.end; } } } return true; }
static void storeSAinnewPGL(GtArray *pgls, GtUword *currentPGLindex, GthSA *sa) { GthPGL *pgl; pgl = gth_pgl_new(gth_sa_gen_strand_forward(sa)); pgl->maxrange.start = gth_sa_get_exon(sa, 0)->leftgenomicexonborder; pgl->maxrange.end = gth_sa_get_exon(sa,gth_sa_num_of_exons(sa)-1) ->rightgenomicexonborder; gth_pgl_add_sa(pgl, sa); gt_array_add(pgls, pgl); /* set the current PGL index */ *currentPGLindex = gt_array_size(pgls) - 1; }
void gthclusterSAstoPGLs(GtArray *pgls, GthSACollection *sa_collection) { GtUword forwardgen_file_num, /* the genomic file number of the current forward cluster */ forwardmaxright, /* the maximal right position of the current forward cluster */ forward_currentPGLindex, /* the current forward PGL index */ reversegen_file_num, /* the genomic file number of the current reverse cluster */ reversemaxright, /* the maximal right position of the current reverse cluster */ reverse_currentPGLindex; /* the current reverse PGL index */ GthSACollectionIterator *iterator; GthSA *sa; gt_assert(sa_collection); /* init */ forwardgen_file_num = GT_UNDEF_UWORD; forwardmaxright = GT_UNDEF_UWORD; forward_currentPGLindex = GT_UNDEF_UWORD; reversegen_file_num = GT_UNDEF_UWORD; reversemaxright = GT_UNDEF_UWORD; reverse_currentPGLindex = GT_UNDEF_UWORD; /* cluster the SAs */ iterator = gth_sa_collection_iterator_new(sa_collection); while ((sa = gth_sa_collection_iterator_next(iterator))) { if (gth_sa_gen_strand_forward(sa)) { saveSAtoPGLs(&forwardgen_file_num, &forwardmaxright, &forward_currentPGLindex, pgls, sa); } else { saveSAtoPGLs(&reversegen_file_num, &reversemaxright, &reverse_currentPGLindex, pgls, sa); } } gth_sa_collection_iterator_delete(iterator); /* cluster is consistent */ gt_assert(cluster_is_consistent(pgls)); }
static void xml_inter_show_spliced_alignment(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { bool dnaalpha = true; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<spliced_alignment xmlns=\"http://www.GenomeThreader.org/" "SplicedAlignment/spliced_alignment/\">\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencealphatype>"); switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: gt_file_xprintf(outfp, "DNA_ALPHA"); break; case PROTEIN_ALPHA: gt_file_xprintf(outfp, "PROTEIN_ALPHA"); dnaalpha = false; break; default: gt_assert(0); } gt_file_xprintf(outfp, "</referencealphatype>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<editoperations>\n"); indentlevel++; gth_backtrace_path_show_complete(gth_sa_backtrace_path(sa), true, indentlevel, outfp); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</editoperations>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<indelcount>"GT_WU"</indelcount>\n", gth_sa_indelcount(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomiclengthDP>"GT_WU"</genomiclengthDP>\n", gth_sa_gen_dp_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomiclengthtotal>"GT_WU"</genomiclengthtotal>\n", gth_sa_gen_total_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicoffset>"GT_WU"</genomicoffset>\n", gth_sa_gen_offset(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencelength>"GT_WU"</referencelength>\n", gth_sa_ref_total_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<dpstartpos>"GT_WU"</dpstartpos>\n", gth_sa_gen_dp_start(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<dpendpos>"GT_WU"</dpendpos>\n", gth_sa_gen_dp_end(sa)); showgenomicfilename(sa, input, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicseqnum>"GT_WU"</genomicseqnum>\n", gth_sa_gen_seq_num(sa)); showreferencefilename(sa, input, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referenceseqnum>"GT_WU"</referenceseqnum>\n", gth_sa_ref_seq_num(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicid>%s</genomicid>\n", gth_sa_gen_id(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referenceid>%s</referenceid>\n", gth_sa_ref_id(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicstrandisforward>%s</genomicstrandisforward>\n", GTH_SHOWBOOL(gth_sa_gen_strand_forward(sa))); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencestrandisforward>%s</referencestrandisforward>\n", GTH_SHOWBOOL(gth_sa_ref_strand_forward(sa))); showalignmentcutoffs(sa, indentlevel, outfp); showexons(sa, indentlevel, outfp); showintrons(sa, dnaalpha, indentlevel, outfp); showpolyAtailpos(sa, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<alignmentscore>%.*f</alignmentscore>\n", PRECISION, gth_sa_score(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<coverage>%.*f</coverage>\n", PRECISION, gth_sa_coverage(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<coverageofgenomicsegmentishighest>%s" "</coverageofgenomicsegmentishighest>\n", GTH_SHOWBOOL(gth_sa_genomic_cov_is_highest(sa))); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<cumulativelengthofscoredexons>"GT_WU"" "</cumulativelengthofscoredexons>\n", gth_sa_cumlen_scored_exons(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</spliced_alignment>\n"); }
static void xml_final_show_spliced_alignment(GthSA *sa, GthInput *input, unsigned long minintronlength, unsigned long translationtable, unsigned int indentlevel, GtFile *outfp) { unsigned char *first_line, *second_line, *third_line; GT_UNUSED bool reverse_subject_pos = false; unsigned long cols; gt_assert(sa && input); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<spliced_alignment xmlns=\"http://www.genomethreader.org/" "GTH_output/alignment_module/spliced_alignment/\">\n"); indentlevel++; /* If the reverse complement of the genomic DNA is considered, this opition is needed for correct output of the genomic sequence positions by the function showalignmentgeneric() */ if (!gth_sa_gen_strand_forward(sa)) reverse_subject_pos = true; xml_showgthreferenceinformation(sa, input, indentlevel, outfp); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<seq>"); gth_sa_echo_reference_sequence(sa, input, false, outfp); gt_file_xprintf(outfp, "</seq>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</reference>\n"); xml_showgthgenomicinformation(sa, input, indentlevel, outfp); xml_showalignmentheader(sa, minintronlength, indentlevel, outfp); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<alignment>\n"); /* compute the alignment lines */ cols = gth_sa_get_alignment_lines(sa, &first_line, &second_line, &third_line, translationtable, input); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genome_strand>"); showconcreteline(first_line, cols, outfp); gt_file_xprintf(outfp, "</genome_strand>\n"); gth_indent(outfp, indentlevel); switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: gt_file_xprintf(outfp, "<mrna_strand>"); showconcreteline(second_line, cols, outfp); gt_file_xprintf(outfp, "</mrna_strand>\n"); break; case PROTEIN_ALPHA: gt_file_xprintf(outfp, "<genomeProt>"); showconcreteline(second_line, cols, outfp); gt_file_xprintf(outfp, "</genomeProt>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<queryProt>"); showconcreteline(third_line, cols, outfp); gt_file_xprintf(outfp, "</queryProt>\n"); break; default: gt_assert(0); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</alignment>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</predicted_gene_structure>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</spliced_alignment>\n"); /* free */ gt_free(first_line); gt_free(second_line); gt_free(third_line); }
GtUword gth_sa_get_alignment_lines(const GthSA *sa, unsigned char **first_line, unsigned char **second_line, unsigned char **third_line, GtUword translationtable, GthInput *input) { GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff, referencestartcutoff, referenceendcutoff, referencetotalcutoff; GT_UNUSED bool reverse_subject_pos = false; gt_assert(sa && first_line && second_line && third_line && input); /* only for cosmetic reasons */ genomicstartcutoff = gth_sa_genomiccutoff_start(sa); genomicendcutoff = gth_sa_genomiccutoff_end(sa); genomictotalcutoff = genomicstartcutoff + genomicendcutoff; referencestartcutoff = gth_sa_referencecutoff_start(sa); referenceendcutoff = gth_sa_referencecutoff_end(sa); referencetotalcutoff = referencestartcutoff + referenceendcutoff; /* sequences */ unsigned char *gen_seq_orig, *ref_seq_orig; GtUword cols = 0; GthSeqCon *ref_seq_con; /* make sure that the correct files are loaded */ gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false); ref_seq_con = gth_input_current_ref_seq_con(input); /* If the reverse complement of the genomic DNA is considered, this opition is needed for correct output of the genomic sequence positions by the function showalignmentgeneric() */ if (!gth_sa_gen_strand_forward(sa)) reverse_subject_pos = true; /* get genomic sequence */ gen_seq_orig = (unsigned char*) gth_input_original_genomic_sequence(input, gth_sa_gen_file_num(sa), gth_sa_gen_strand_forward(sa)) + gth_sa_gen_dp_start(sa); /* get reference sequence */ if (gth_sa_ref_strand_forward(sa)) { ref_seq_orig = gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa)); } else { ref_seq_orig = gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa)); } switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: /* compute the two alignment lines */ cols = gthfillthetwoalignmentlines(first_line, second_line, gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), 0, /* linewidth not important here */ 0, /* no short introns here */ NULL,/* therefore no shortintroninfo */ gth_sa_indelcount(sa)); *third_line = NULL; break; case PROTEIN_ALPHA: /* compute the three alignment lines */ cols = gthfillthethreealignmentlines(first_line, second_line, third_line, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), gth_sa_indelcount(sa), gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, translationtable); break; default: gt_assert(0); } return cols; }
void gth_sa_echo_alignment(const GthSA *sa, GtUword showintronmaxlen, GtUword translationtable, bool wildcardimplosion, GthInput *input, GtFile *outfp) { GtUword genomicstartcutoff, genomicendcutoff, genomictotalcutoff, referencestartcutoff, referenceendcutoff, referencetotalcutoff; bool reverse_subject_pos = false; const unsigned char *gen_seq_orig, *ref_seq_orig; GthSeqCon *ref_seq_con; GtAlphabet *ref_alphabet; gt_assert(sa && input); /* only for cosmetic reasons */ genomicstartcutoff = gth_sa_genomiccutoff_start(sa); genomicendcutoff = gth_sa_genomiccutoff_end(sa); genomictotalcutoff = genomicstartcutoff + genomicendcutoff; referencestartcutoff = gth_sa_referencecutoff_start(sa); referenceendcutoff = gth_sa_referencecutoff_end(sa); referencetotalcutoff = referencestartcutoff + referenceendcutoff; /* make sure that the correct files are loaded */ gth_input_load_reference_file(input, gth_sa_ref_file_num(sa), false); ref_seq_con = gth_input_current_ref_seq_con(input); ref_alphabet = gth_input_current_ref_alphabet(input); /* If the reverse complement of the genomic DNA is considered, this opition is needed for correct output of the genomic sequence positions by the function showalignmentgeneric() */ if (!gth_sa_gen_strand_forward(sa)) reverse_subject_pos = true; /* get genomic sequence */ gen_seq_orig = gth_input_original_genomic_sequence(input, sa->gen_file_num, sa->gen_strand_forward) + gth_sa_gen_dp_start(sa); /* get reference sequence */ if (gth_sa_ref_strand_forward(sa)) { ref_seq_orig = gth_seq_con_get_orig_seq(ref_seq_con, gth_sa_ref_seq_num(sa)); } else { ref_seq_orig = gth_seq_con_get_orig_seq_rc(ref_seq_con, gth_sa_ref_seq_num(sa)); } switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: gthshowalignmentdna(outfp,ALIGNMENTLINEWIDTH, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), gth_sa_indelcount(sa), gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, gth_sa_gen_dp_start(sa) + genomicstartcutoff - gth_sa_gen_offset(sa), referencestartcutoff, gth_sa_gen_total_length(sa), showintronmaxlen, ref_alphabet, reverse_subject_pos, wildcardimplosion); break; case PROTEIN_ALPHA: gthshowalignmentprotein(outfp, ALIGNMENTLINEWIDTH, gth_sa_get_editoperations(sa), gth_sa_get_editoperations_length(sa), gth_sa_indelcount(sa), gen_seq_orig + genomicstartcutoff, gth_sa_gen_dp_length(sa) - genomictotalcutoff, ref_seq_orig + referencestartcutoff, gth_sa_ref_total_length(sa) - referencetotalcutoff, gth_sa_gen_dp_start(sa) + genomicstartcutoff - gth_sa_gen_offset(sa), referencestartcutoff, gth_sa_gen_total_length(sa), showintronmaxlen, ref_alphabet, translationtable, gth_input_score_matrix(input), gth_input_score_matrix_alpha(input), reverse_subject_pos, wildcardimplosion); break; default: gt_assert(0); } }