static void xml_showgthgenomicinformation(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { gt_assert(gth_sa_gen_file_num(sa) != GT_UNDEF_ULONG); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_segment>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<template temp_file=\"%s\" temp_id=\"%s\" " "temp_strand=\"%c\" temp_description=\"", gth_input_get_genomic_filename(input, gth_sa_gen_file_num(sa)), gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa)); gth_input_echo_genomic_description(input, gth_sa_gen_file_num(sa), gth_sa_gen_seq_num(sa), outfp); gt_file_xprintf(outfp, "\">\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<position start=\"%lu\" stop=\"%lu\"/>\n", gth_sa_gen_dp_start_show(sa), gth_sa_gen_dp_end_show(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</template>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</gDNA_segment>\n"); }
static void xml_outputAGSline(const GthAGS *ags, unsigned long agsnum, unsigned int indentlevel, GtFile *outfp) { GthExonAGS *exon; unsigned long i; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<AGS_line AGS_serial=\"%lu\">\n", agsnum + OUTPUTOFFSET); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon_coordinates>\n"); indentlevel++; for (i = 0; i < gth_ags_num_of_exons(ags); i++) { exon = gth_ags_get_exon(ags, i); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon e_start=\"%lu\" e_stop=\"%lu\"/>\n", SHOWGENPOSAGS(exon->range.start), SHOWGENPOSAGS(exon->range.end)); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</exon_coordinates>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</AGS_line>\n"); }
static void xml_outputSCRline(const GthAGS *ags, unsigned int indentlevel, GtFile *outfp) { GthSpliceSiteProb *splicesiteprob; unsigned long i; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<SCR_line>\n"); indentlevel++; for (i = 0; i < gt_array_size(ags->exons) - 1; i++) { splicesiteprob = (GthSpliceSiteProb*) gt_array_get(ags->splicesiteprobs, i); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon-intron don_prob=\"%.3f\" " "acc_prob=\"%.3f\" e_score=\"%.3f\"/>\n", splicesiteprob->donorsiteprob, splicesiteprob->acceptorsiteprob, ((GthExonAGS*) gt_array_get(ags->exons, i))->score); } gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon-only e_score=\"%.3f\"/>\n", ((GthExonAGS*) gt_array_get(ags->exons, i))->score); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</SCR_line>\n"); }
static void xml_show_ags(const GthAGS *ags, unsigned long pglnum, unsigned long agsnum, unsigned long translationtable, GthInput *input, unsigned int indentlevel, GthOutput *out) { gth_indent(out->outfp, indentlevel); gt_file_xprintf(out->outfp, "<AGS_information>\n"); indentlevel++; /* output AGS line */ xml_outputAGSline(ags, agsnum, indentlevel, out->outfp); /* output SCR line */ xml_outputSCRline(ags, indentlevel, out->outfp); /* output exon/intron lines */ xml_output_exon_intron_lines(ags, indentlevel, out->outfp); /* output PGS lines */ xml_outputPGSlines(ags->alignments, indentlevel, out->outfp); /* output 3-phase translation */ gt_outputtranslationandorf(pglnum, ags, agsnum, translationtable, input, indentlevel, out); indentlevel--; gth_indent(out->outfp, indentlevel); gt_file_xprintf(out->outfp, "</AGS_information>\n"); }
static void xml_show_pgl(GthPGL *pgl, unsigned long pglnum, unsigned long translationtable, GthInput *input, unsigned int indentlevel, GthOutput *out) { unsigned long i; gth_indent(out->outfp, indentlevel); gt_file_xprintf(out->outfp, "<predicted_gene_location>\n"); indentlevel++; gth_indent(out->outfp, indentlevel); gt_file_xprintf(out->outfp, "<PGL_line PGL_serial=\"%lu\" PGL_strand=\"%c\" " "PGL_start=\"%lu\" PGL_stop=\"%lu\"/>\n", pglnum + OUTPUTOFFSET, SHOWSTRAND(gth_pgl_is_forward(pgl)), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.start), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.end)); for (i = 0; i < gth_pgl_num_of_ags(pgl); i++) { xml_show_ags(gth_pgl_get_ags(pgl, i), pglnum, i, translationtable, input, indentlevel, out); } indentlevel--; gth_indent(out->outfp, indentlevel); gt_file_xprintf(out->outfp, "</predicted_gene_location>\n"); }
/* The following function prints the "classic" GeneSeqer2 MATCH line */ static void xml_showmatchline(GthSA *sa, unsigned int indentlevel, GtFile *outfp) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<MATCH_line gen_id=\"%s\" gen_strand=\"%c\" ", gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa)); if (gth_sa_alphatype(sa) == DNA_ALPHA) { gt_file_xprintf(outfp, "ref_id=\"%s\" ref_strand=\"%c\">\n", gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa)); } else gt_file_xprintf(outfp, "ref_id=\"%s\">\n", gth_sa_ref_id(sa)); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<total_alignment_score>%.3f</total_alignment_score>\n", gth_sa_score(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<cumulative_length_of_scored_exons>%lu" "</cumulative_length_of_scored_exons>\n", gth_sa_cumlen_scored_exons(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<coverage percentage=\"%.3f\" high_type=\"", gth_sa_coverage(sa)); gt_file_xfputc(gth_sa_coverage_char(sa), outfp); gt_file_xprintf(outfp, "\"/>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</MATCH_line>\n"); }
static void show_pgl(GthPGL *pgl, GtUword pglnum, GtUword translationtable, GthInput *input, unsigned int indentlevel, GthOutput *out) { GtUword i; GtFile *outfp = out->outfp; gt_assert(!out->gff3out); if (out->xmlout) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<predicted_gene_location>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<PGL_line PGL_serial=\"" GT_WU "\" " "PGL_strand=\"%c\" PGL_start=\"" GT_WU "\" PGL_stop=\"" GT_WU "\"/>\n", pglnum + OUTPUTOFFSET, SHOWSTRAND(gth_pgl_is_forward(pgl)), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.start), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.end)); } else { gt_file_xprintf(outfp, "PGL %3" GT_WUS " (%c strand): " GT_WU " " GT_WU, pglnum + OUTPUTOFFSET, SHOWSTRAND(gth_pgl_is_forward(pgl)), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.start), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.end)); if (out->pglgentemplate) gt_file_xprintf(outfp, " (genomic template '%s')", gth_pgl_gen_id(pgl)); gt_file_xfputc('\n', outfp); } for (i = 0; i < gt_array_size(pgl->assemblies); i++) { show_ags(gth_pgl_get_ags(pgl, i), pglnum, i, translationtable, input, indentlevel, out); } if (out->xmlout) { indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</predicted_gene_location>\n"); } }
static void xml_final_sa_visitor_trailer(GthSAVisitor *sa_visitor, unsigned long num_of_sas) { GthXMLFinalSAVisitor *visitor = xml_final_sa_visitor_cast(sa_visitor); visitor->indentlevel++; gth_indent(visitor->outfp, visitor->indentlevel); gt_file_xprintf(visitor->outfp, "<total_number_ESTs_reported>%lu" "</total_number_ESTs_reported>\n", num_of_sas); gth_indent(visitor->outfp, visitor->indentlevel); gt_file_xprintf(visitor->outfp, "</alignment_module>\n"); visitor->indentlevel--; }
static void xml_showgthreferenceinformation(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { gt_assert(gth_sa_ref_file_num(sa) != GT_UNDEF_ULONG); gth_indent(outfp, indentlevel); switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: gt_file_xprintf(outfp, "<reference ref_file=\"%s\" ref_id=\"%s\" " "ref_strand=\"%c\" ref_description=\"", gth_input_get_reference_filename(input, gth_sa_ref_file_num(sa)), gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa)); break; case PROTEIN_ALPHA: gt_file_xprintf(outfp, "<reference ref_file=\"%s\" ref_id=\"%s\" " "ref_description=\"", gth_input_get_reference_filename(input, gth_sa_ref_file_num(sa)), gth_sa_ref_id(sa)); break; default: gt_assert(0); } gth_input_echo_reference_description(input, gth_sa_ref_file_num(sa), gth_sa_ref_seq_num(sa), outfp); gt_file_xprintf(outfp, "\">\n"); }
static void xml_pgl_visitor_preface(GthPGLVisitor *pgl_visitor, GT_UNUSED unsigned long num_of_pgls) { GthXMLPGLVisitor *visitor = xml_pgl_visitor_cast(pgl_visitor); gth_indent(visitor->out->outfp, visitor->indentlevel); gt_file_xprintf(visitor->out->outfp, "<PGL_module xmlns=\"http://www.genomethreader." "org/GTH_output/PGL_module/\">\n"); }
static void showpolyAtailpos(GthSA *sa, unsigned int indentlevel, GtFile *outfp) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<polyAtailpos>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<polyAstart>"GT_WU"</polyAstart>\n", gth_sa_polyAtail_start(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<polyAstop>"GT_WU"</polyAstop>\n", gth_sa_polyAtail_stop(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</polyAtailpos>\n"); }
static void showgenomicfilename(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicfile>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicfilename>%s</genomicfilename>\n", gth_input_get_genomic_filename(input, gth_sa_gen_file_num(sa))); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicfilehash>%s</genomicfilehash>\n", GTH_UNDEFINED_HASH); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</genomicfile>\n"); }
static void xml_outputPGSlines(GtArray *alignments, unsigned int indentlevel, GtFile *outfp) { unsigned long i, j; GthSA *sa; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<supporting_evidence xmlns=\"" "http://www.genomethreader.org/" "GTH_output/PGL_module/predicted_gene_location/" "AGS_information/supporting_evidence/\">\n"); indentlevel++; for (i = 0; i < gt_array_size(alignments); i++) { sa = *(GthSA**) gt_array_get(alignments, i); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<PGS_line>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_exon_coordinates>\n"); indentlevel++; for (j = 0; j < gth_sa_num_of_exons(sa); j++) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon start=\"%lu\" stop=\"%lu\"/>\n", gth_sa_left_genomic_exon_border(sa, j), gth_sa_right_genomic_exon_border(sa, j)); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</gDNA_exon_coordinates>\n"); gth_indent(outfp, indentlevel); if (gth_sa_alphatype(sa) == DNA_ALPHA) { gt_file_xprintf(outfp, "<referenceDNA id=\"%s\" strand=\"%c\"/>\n", gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa)); } else { gt_file_xprintf(outfp, "<referenceProtein id=\"%s\"/>\n", gth_sa_ref_id(sa)); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</PGS_line>\n"); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</supporting_evidence>\n"); }
/* The following function prints a PPA line, which shows the start and end position of the poly-A tail in the cDNA (iff a poly-A tail could be determined). */ static void xml_showppaline(GthSA *sa, unsigned int indentlevel, GtFile *outfp) { if (gth_sa_polyAtail_start(sa) || gth_sa_polyAtail_stop(sa)) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<PPA_line polyA_start=\"%lu\" polyA_stop=\"%lu\"/>\n", gth_sa_polyAtail_start(sa) + OUTPUTOFFSET, gth_sa_polyAtail_stop(sa) + OUTPUTOFFSET); } }
static void show_overall_reference_type(GthAlphatype overallalphatype, unsigned int indentlevel, GtFile *outfp) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<overall_reference_type>"); switch (overallalphatype) { case DNA_ALPHA: gt_file_xprintf(outfp, "ESTcDNA"); break; case PROTEIN_ALPHA: gt_file_xprintf(outfp, "Protein"); break; case MIXED_ALPHA: gt_file_xprintf(outfp, "Mixed"); break; default: gt_assert(0); } gt_file_xprintf(outfp, "</overall_reference_type>\n"); }
/* The following function prints the "classic" GeneSeqer2 PGS line */ static void xml_showpgsline(GthSA *sa, unsigned int indentlevel, GtFile *outfp) { unsigned long i, numofexons; gt_assert(sa); numofexons = gth_sa_num_of_exons(sa); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<PGS_line>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA gen_id=\"%s\" gen_strand=\"%c\"/>\n", gth_sa_gen_id(sa), gth_sa_gen_strand_char(sa)); gth_indent(outfp, indentlevel); if (gth_sa_alphatype(sa) == DNA_ALPHA) { gt_file_xprintf(outfp, "<rDNA rDNA_id=\"%s\" rDNA_strand=\"%c\"/>\n", gth_sa_ref_id(sa), gth_sa_ref_strand_char(sa)); } else { gt_file_xprintf(outfp, "<rProt rProt_id=\"%s\"/>\n", gth_sa_ref_id(sa)); } gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_exon_coordinates>\n"); indentlevel++; for (i = 0; i < numofexons; i++) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon e_start=\"%lu\" e_stop=\"%lu\"/>\n", gth_sa_left_genomic_exon_border(sa, i), gth_sa_right_genomic_exon_border(sa, i)); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</gDNA_exon_coordinates>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</PGS_line>\n"); }
static void showexons(GthSA *sa, unsigned int indentlevel, GtFile *outfp) { Exoninfo *exoninfo; GtUword i; for (i = 0; i < gth_sa_num_of_exons(sa); i++) { exoninfo = gth_sa_get_exon(sa, i); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exoninfo>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<leftgenomicexonborder>"GT_WU"</leftgenomicexonborder>\n", exoninfo->leftgenomicexonborder); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<rightgenomicexonborder>"GT_WU "</rightgenomicexonborder>\n", exoninfo->rightgenomicexonborder); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<leftreferenceexonborder>"GT_WU "</leftreferenceexonborder>\n", exoninfo->leftreferenceexonborder); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<rightreferenceexonborder>"GT_WU "</rightreferenceexonborder>\n", exoninfo->rightreferenceexonborder); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exonscore>%.*f</exonscore>\n", PRECISION, exoninfo->exonscore); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</exoninfo>\n"); } }
static void showintrons(GthSA *sa, bool dnaalpha, unsigned int indentlevel, GtFile *outfp) { Introninfo *introninfo; GtUword i; for (i = 0; i < gth_sa_num_of_introns(sa); i++) { introninfo = gth_sa_get_intron(sa, i); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<introninfo>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<donorsiteprobability>%.*f</donorsiteprobability>\n", PRECISION, introninfo->donorsiteprobability); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<acceptorsiteprobability>%.*f</acceptorsiteprobability>\n", PRECISION, introninfo->acceptorsiteprobability); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<donorsitescore>%.*f</donorsitescore>\n", PRECISION, dnaalpha ? introninfo->donorsitescore : UNDEFINED_SPLICE_SITE_SCORE); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<acceptorsitescore>%.*f</acceptorsitescore>\n", PRECISION, dnaalpha ? introninfo->acceptorsitescore : UNDEFINED_SPLICE_SITE_SCORE); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</introninfo>\n"); } }
static void xml_inter_show_spliced_alignment(GthSA *sa, GthInput *input, unsigned int indentlevel, GtFile *outfp) { bool dnaalpha = true; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<spliced_alignment xmlns=\"http://www.GenomeThreader.org/" "SplicedAlignment/spliced_alignment/\">\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencealphatype>"); switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: gt_file_xprintf(outfp, "DNA_ALPHA"); break; case PROTEIN_ALPHA: gt_file_xprintf(outfp, "PROTEIN_ALPHA"); dnaalpha = false; break; default: gt_assert(0); } gt_file_xprintf(outfp, "</referencealphatype>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<editoperations>\n"); indentlevel++; gth_backtrace_path_show_complete(gth_sa_backtrace_path(sa), true, indentlevel, outfp); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</editoperations>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<indelcount>"GT_WU"</indelcount>\n", gth_sa_indelcount(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomiclengthDP>"GT_WU"</genomiclengthDP>\n", gth_sa_gen_dp_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomiclengthtotal>"GT_WU"</genomiclengthtotal>\n", gth_sa_gen_total_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicoffset>"GT_WU"</genomicoffset>\n", gth_sa_gen_offset(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencelength>"GT_WU"</referencelength>\n", gth_sa_ref_total_length(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<dpstartpos>"GT_WU"</dpstartpos>\n", gth_sa_gen_dp_start(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<dpendpos>"GT_WU"</dpendpos>\n", gth_sa_gen_dp_end(sa)); showgenomicfilename(sa, input, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicseqnum>"GT_WU"</genomicseqnum>\n", gth_sa_gen_seq_num(sa)); showreferencefilename(sa, input, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referenceseqnum>"GT_WU"</referenceseqnum>\n", gth_sa_ref_seq_num(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicid>%s</genomicid>\n", gth_sa_gen_id(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referenceid>%s</referenceid>\n", gth_sa_ref_id(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomicstrandisforward>%s</genomicstrandisforward>\n", GTH_SHOWBOOL(gth_sa_gen_strand_forward(sa))); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencestrandisforward>%s</referencestrandisforward>\n", GTH_SHOWBOOL(gth_sa_ref_strand_forward(sa))); showalignmentcutoffs(sa, indentlevel, outfp); showexons(sa, indentlevel, outfp); showintrons(sa, dnaalpha, indentlevel, outfp); showpolyAtailpos(sa, indentlevel, outfp); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<alignmentscore>%.*f</alignmentscore>\n", PRECISION, gth_sa_score(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<coverage>%.*f</coverage>\n", PRECISION, gth_sa_coverage(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<coverageofgenomicsegmentishighest>%s" "</coverageofgenomicsegmentishighest>\n", GTH_SHOWBOOL(gth_sa_genomic_cov_is_highest(sa))); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<cumulativelengthofscoredexons>"GT_WU"" "</cumulativelengthofscoredexons>\n", gth_sa_cumlen_scored_exons(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</spliced_alignment>\n"); }
static void show_xml_run_header(GthCallInfo *call_info, GthInput *input, const char *timestring, const char *gth_version, unsigned int indentlevel, const char **args) { GtFile *outfp = call_info->out->outfp; GtUword i; gth_indent(outfp, indentlevel); if (call_info->intermediate) { gt_file_xprintf(outfp, "<header xmlns=\"http://www.GenomeThreader.org/" "SplicedAlignment/header/\">\n"); } else { gt_file_xprintf(outfp, "<header xmlns=\"http://www.genomethreader.org/GTH_output/" "header/\">\n"); } /* at least one genomic file defined */ gt_assert(gth_input_num_of_gen_files(input)); /* at least one reference file defined */ gt_assert(gth_input_num_of_ref_files(input)); /* show a readable version of GthCallInfo. That is, it is shown with wich parameters the program was called */ indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<source program=\"GenomeThreader\" version=\"%s\" " "build_date=\"%s\" run_date=\"%s\"/>\n", gth_version, GT_BUILT, timestring); /* show genomic file names */ gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_template_files>\n"); indentlevel++; for (i = 0; i < gth_input_num_of_gen_files(input); i++) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<temp_name>%s</temp_name>\n", gth_input_get_genomic_filename(input, i)); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</gDNA_template_files>\n"); /* show reference file names */ gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<reference_files>\n"); indentlevel++; for (i = 0; i < gth_input_num_of_ref_files(input); i++) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<file ref_name=\"%s\" type=\"%s\"/>\n", gth_input_get_reference_filename(input, i), gth_input_get_alphatype(input, i) == DNA_ALPHA ? "ESTcDNA" : "Protein"); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</reference_files>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<splice_site_parameters parameter_type=\"%s\" " "species=\"%s\"/>\n", SPLICE_SITE_MODEL_NAME, call_info->speciesnum == NUMOFSPECIES ? GENERIC_SPECIES_NAME : speciestab[call_info->speciesnum]); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<parameters>\n"); indentlevel++; /* output name of BSSM file */ gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<parameter name=\"bssmfile\" value=\"%s\"/>\n", gth_input_bssmfilename(input)); /* output name of scorematrix */ gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<parameter name=\"scorematrixfile\" value=\"%s\"/>\n", gt_str_get(call_info->scorematrixfile)); /* output searchmode */ gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<parameter name=\"searchmode\" " "value=\"forward=%s,reverse=%s)\"/>\n", GTH_SHOWBOOL(gth_input_forward(input)), GTH_SHOWBOOL(gth_input_reverse(input))); /* output arguments as comment */ gt_file_xprintf(outfp, "<!--\n%c Arguments: ", COMMENTCHAR); gt_cstr_array_show_genfile(args, outfp); gt_file_xprintf(outfp, "-->\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</parameters>\n"); show_overall_reference_type(gth_input_overall_alphatype(input), indentlevel, outfp); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</header>\n"); }
static void xml_pgl_visitor_trailer(GthPGLVisitor *pgl_visitor) { GthXMLPGLVisitor *visitor = xml_pgl_visitor_cast(pgl_visitor); gth_indent(visitor->out->outfp, visitor->indentlevel); gt_file_xprintf(visitor->out->outfp, "</PGL_module>\n"); }
static void xml_output_exon_intron_lines(const GthAGS *ags, unsigned int indentlevel, GtFile *outfp) { GthSpliceSiteProb *splicesiteprob; GthExonAGS *exon; unsigned long i, leftexonborder, rightexonborder, exonlength, leftintronborder = GT_UNDEF_ULONG, rightintronborder, intronlength; GthDbl exonscore; GthFlt donorsiteprob, acceptorsiteprob; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon-intron_info xmlns=\"http://www.genomethreader.org/" "GTH_output/PGL_module/predicted_gene_location/" "AGS_information/exon-intron_info/\">\n"); indentlevel++; for (i = 0; i < gt_array_size(ags->exons); i++) { exon = (GthExonAGS*) gt_array_get(ags->exons, i); leftexonborder = exon->range.start; rightexonborder = exon->range.end; exonlength = rightexonborder - leftexonborder + 1; exonscore = exon->score; if (i > 0) { rightintronborder = leftexonborder - 1; intronlength = rightintronborder - leftintronborder + 1; splicesiteprob = (GthSpliceSiteProb*) gt_array_get(ags->splicesiteprobs, i-1); donorsiteprob = splicesiteprob->donorsiteprob; acceptorsiteprob = splicesiteprob->acceptorsiteprob; /* output intron */ gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<intron i_serial=\"%lu\" don_prob=\"%.3f\" " "acc_prob=\"%.3f\">\n", i - 1 + OUTPUTOFFSET, donorsiteprob, acceptorsiteprob); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_intron_boundary i_start=\"%lu\" i_stop=\"%lu\" " "i_length=\"%lu\"/>\n", SHOWGENPOSAGS(leftintronborder), SHOWGENPOSAGS(rightintronborder), intronlength); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</intron>\n"); } leftintronborder = rightexonborder + 1; /* output exon */ gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon e_serial=\"%lu\" e_score=\"%.3f\">\n", i + OUTPUTOFFSET, exonscore); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_exon_boundary e_start=\"%lu\" e_stop=\"%lu\" " "e_length=\"%lu\"/>\n", SHOWGENPOSAGS(leftexonborder), SHOWGENPOSAGS(rightexonborder), exonlength); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</exon>\n"); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</exon-intron_info>\n"); }
void gt_outputtranslationandorf(unsigned long pglnum, const GthAGS *ags, unsigned long agsnum, unsigned long translationtable, GthInput *input, unsigned int indentlevel, GthOutput *out) { unsigned long i; unsigned int nframe; const unsigned char *gen_seq_orig; GtStr *frame[3]; char translated; GtTranslatorStatus status; GtTranslator *translator; GtTransTable *transtable; GtCodonIterator *ci; GthSplicedSeq *spliced_seq; GtArray *ranges; GtFile *outfp = out->outfp; /* output header */ if (out->xmlout) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<three_phase_translation " "xmlns=\"http://www.genomethreader.org/GTH_output/" "PGL_module/predicted_gene_location/AGS_information/" "three_phase_translation/\">\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<description PGL_serial=\"%lu\" " "AGS_serial=\"%lu\" gDNA_strand=\"%c\"/>\n", pglnum + OUTPUTOFFSET, agsnum + OUTPUTOFFSET, SHOWSTRAND(gth_ags_is_forward(ags))); } else { gt_file_xprintf(outfp, "3-phase translation of AGS-%lu (%cstrand):\n\n", agsnum + OUTPUTOFFSET, SHOWSTRAND(gth_ags_is_forward(ags))); } ranges = gt_array_new(sizeof (GtRange)); for (i = 0; i < gt_array_size(ags->exons); i++) gt_array_add(ranges, ((GthExonAGS*) gt_array_get(ags->exons, i))->range); /* get genomic sequence */ gen_seq_orig = gth_input_original_genomic_sequence(input, gth_ags_filenum(ags), gth_ags_is_forward(ags)); spliced_seq = gth_spliced_seq_new(gen_seq_orig, ranges); frame[0] = gt_str_new(); frame[1] = gt_str_new(); frame[2] = gt_str_new(); /* prepare for translation */ ci = gt_codon_iterator_simple_new((const char*) spliced_seq->splicedseq, spliced_seq->splicedseqlen, NULL); gt_assert(ci); transtable = gt_trans_table_new(translationtable, NULL); gt_assert(transtable); /* translate the template in all three frames */ translator = gt_translator_new_with_table(transtable, ci); status = gt_translator_next(translator, &translated, &nframe, NULL); while (status == GT_TRANSLATOR_OK) { gt_str_append_char(frame[nframe], translated); status = gt_translator_next(translator, &translated, &nframe, NULL); } gt_assert(status != GT_TRANSLATOR_ERROR); gt_translator_delete(translator); gt_trans_table_delete(transtable); gt_codon_iterator_delete(ci); /* show the translation */ showtranslation(spliced_seq, gt_str_get(frame[0]), gt_str_get(frame[1]), gt_str_get(frame[2]), ags->exons, gth_ags_is_forward(ags), gth_ags_total_length(ags), gth_ags_genomic_offset(ags), indentlevel, out); /* show the (consolidated) ORFs */ gthshowORFs(gt_str_get(frame[0]), gt_str_get(frame[1]), gt_str_get(frame[2]), gt_str_length(frame[0]), gt_str_length(frame[1]), gt_str_length(frame[2]), gth_ags_is_forward(ags), gth_ags_total_length(ags), gth_ags_genomic_offset(ags), gt_str_get(ags->gen_id), pglnum, agsnum, spliced_seq, indentlevel, out); if (out->xmlout) { indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</three_phase_translation>\n"); } gth_spliced_seq_delete(spliced_seq); gt_array_delete(ranges); gt_str_delete(frame[0]); gt_str_delete(frame[1]); gt_str_delete(frame[2]); }
static void xml_final_sa_visitor_preface(GthSAVisitor *sa_visitor) { GthXMLFinalSAVisitor *visitor = xml_final_sa_visitor_cast(sa_visitor); gth_indent(visitor->outfp, visitor->indentlevel); gt_file_xprintf(visitor->outfp, "<alignment_module>\n"); }
static void xml_showalignmentheader(GthSA *sa, unsigned long minintronlength, unsigned int indentlevel, GtFile *outfp) { unsigned long i, leftreferenceexonborder, rightreferenceexonborder, referenceexonlength; GthDbl exonscore, donorsitescore, acceptorsitescore; GthFlt donorsiteprobability, acceptorsiteprobability; Exoninfo *exoninfo; Introninfo *introninfo; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<predicted_gene_structure>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon-intron_info>\n"); indentlevel++; for (i = 0; i < gth_sa_num_of_exons(sa); i++) { exoninfo = gth_sa_get_exon(sa, i); leftreferenceexonborder = exoninfo->leftreferenceexonborder; rightreferenceexonborder = exoninfo->rightreferenceexonborder; referenceexonlength = rightreferenceexonborder - leftreferenceexonborder + 1; exonscore = exoninfo->exonscore; if (i > 0) { introninfo = gth_sa_get_intron(sa, i-1); donorsiteprobability = introninfo->donorsiteprobability; donorsitescore = introninfo->donorsitescore; acceptorsiteprobability = introninfo->acceptorsiteprobability; acceptorsitescore = introninfo->acceptorsitescore; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<intron i_serial=\"%lu\">\n", i - 1 + OUTPUTOFFSET); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_intron_boundary i_start=\"%lu\" i_stop=\"%lu\" " "i_length=\"%lu\">\n", gth_sa_left_intron_border(sa, i-1), gth_sa_right_intron_border(sa, i-1), gth_sa_intron_length(sa, i-1)); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<donor d_prob=\"%.3f\"", donorsiteprobability); if (gth_sa_alphatype(sa) == DNA_ALPHA) gt_file_xprintf(outfp, " d_score=\"%.2f\"", donorsitescore); gt_file_xprintf(outfp, "/>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<acceptor a_prob=\"%.3f\"", acceptorsiteprobability); if (gth_sa_alphatype(sa) == DNA_ALPHA) gt_file_xprintf(outfp, " a_score=\"%.2f\"", acceptorsitescore); gt_file_xprintf(outfp, "/>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</gDNA_intron_boundary>\n"); /* if the intron is shorter or equal than the minimal intron length an additional tag is shown */ if (gth_sa_intron_length(sa, i-1) <= minintronlength) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<shorter_than_min_intron_len/>\n"); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</intron>\n"); } gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<exon e_serial=\"%lu\">\n", i + OUTPUTOFFSET); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_exon_boundary g_start=\"%lu\" g_stop=" "\"%lu\" g_length=\"%lu\"/>\n", gth_sa_left_genomic_exon_border(sa, i), gth_sa_right_genomic_exon_border(sa, i), gth_sa_genomic_exon_length(sa, i)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<reference_exon_boundary r_type=\"%s\" r_start=\"%lu\" " "r_stop=\"%lu\" r_length=\"%lu\" r_score=\"%5.3f\"/>\n", gth_sa_alphastring(sa), leftreferenceexonborder + OUTPUTOFFSET , rightreferenceexonborder + OUTPUTOFFSET , referenceexonlength, exonscore); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</exon>\n"); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</exon-intron_info>\n"); /* showing PPA line (if an poly-A tail was determined) */ if (gth_sa_alphatype(sa) == DNA_ALPHA) xml_showppaline(sa, indentlevel, outfp); /* showing MATCH line */ xml_showmatchline(sa, indentlevel, outfp); /* showing PGS line */ xml_showpgsline(sa, indentlevel, outfp); }
static void xml_final_show_spliced_alignment(GthSA *sa, GthInput *input, unsigned long minintronlength, unsigned long translationtable, unsigned int indentlevel, GtFile *outfp) { unsigned char *first_line, *second_line, *third_line; GT_UNUSED bool reverse_subject_pos = false; unsigned long cols; gt_assert(sa && input); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<spliced_alignment xmlns=\"http://www.genomethreader.org/" "GTH_output/alignment_module/spliced_alignment/\">\n"); indentlevel++; /* If the reverse complement of the genomic DNA is considered, this opition is needed for correct output of the genomic sequence positions by the function showalignmentgeneric() */ if (!gth_sa_gen_strand_forward(sa)) reverse_subject_pos = true; xml_showgthreferenceinformation(sa, input, indentlevel, outfp); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<seq>"); gth_sa_echo_reference_sequence(sa, input, false, outfp); gt_file_xprintf(outfp, "</seq>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</reference>\n"); xml_showgthgenomicinformation(sa, input, indentlevel, outfp); xml_showalignmentheader(sa, minintronlength, indentlevel, outfp); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<alignment>\n"); /* compute the alignment lines */ cols = gth_sa_get_alignment_lines(sa, &first_line, &second_line, &third_line, translationtable, input); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genome_strand>"); showconcreteline(first_line, cols, outfp); gt_file_xprintf(outfp, "</genome_strand>\n"); gth_indent(outfp, indentlevel); switch (gth_sa_alphatype(sa)) { case DNA_ALPHA: gt_file_xprintf(outfp, "<mrna_strand>"); showconcreteline(second_line, cols, outfp); gt_file_xprintf(outfp, "</mrna_strand>\n"); break; case PROTEIN_ALPHA: gt_file_xprintf(outfp, "<genomeProt>"); showconcreteline(second_line, cols, outfp); gt_file_xprintf(outfp, "</genomeProt>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<queryProt>"); showconcreteline(third_line, cols, outfp); gt_file_xprintf(outfp, "</queryProt>\n"); break; default: gt_assert(0); } indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</alignment>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</predicted_gene_structure>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</spliced_alignment>\n"); /* free */ gt_free(first_line); gt_free(second_line); gt_free(third_line); }
static void showalignmentcutoffs(GthSA *sa, unsigned int indentlevel, GtFile *outfp) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<cutoffs>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<cutoffsstart>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomiccutoff>"GT_WU"</genomiccutoff>\n", gth_sa_genomiccutoff_start(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencecutoff>"GT_WU"</referencecutoff>\n", gth_sa_referencecutoff_start(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<eopcutoff>"GT_WU"</eopcutoff>\n", gth_sa_eopcutoff_start(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</cutoffsstart>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<cutoffsend>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<genomiccutoff>"GT_WU"</genomiccutoff>\n", gth_sa_genomiccutoff_end(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<referencecutoff>"GT_WU"</referencecutoff>\n", gth_sa_referencecutoff_end(sa)); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<eopcutoff>"GT_WU"</eopcutoff>\n", gth_sa_eopcutoff_end(sa)); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</cutoffsend>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</cutoffs>\n"); }
static void showtranslation(GthSplicedSeq *splicedseq, char *frame0_in, char *frame1_in, char *frame2_in, GtArray *exons, bool gen_strand_forward, unsigned long gen_total_length, unsigned long gen_offset, unsigned int indentlevel, GthOutput *out) { char *dotline, *template_out, *frame0_out, *frame1_out, *frame2_out; unsigned long i, exonseparatorwidth = strlen(EXONSEPARATORSTRING), outlen = splicedseq->splicedseqlen + ((gt_array_size(exons) - 1) * exonseparatorwidth) + (splicedseq->splicedseqlen / TRANSLATIONLINEWIDTH); GtFile *outfp = out->outfp; dotline = gt_malloc(sizeof (unsigned char) * outlen); template_out = gt_malloc(sizeof (unsigned char) * outlen); frame0_out = gt_malloc(sizeof (unsigned char) * outlen); frame1_out = gt_malloc(sizeof (unsigned char) * outlen); frame2_out = gt_malloc(sizeof (unsigned char) * outlen); createoutputlines(dotline, template_out, frame0_out, frame1_out, frame2_out, (char*) splicedseq->splicedseq, frame0_in, frame1_in, frame2_in, splicedseq, exonseparatorwidth, outlen, out->gs2out); if (out->xmlout) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<translation>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<gDNA_template>"); for (i = 0; i < outlen; i++) { if (template_out[i] != '\n') { gt_file_xfputc(template_out[i], outfp); } } gt_file_xprintf(outfp, "</gDNA_template>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<first_frame>"); for (i = 0; i < outlen; i++) { if (frame0_out[i] != '\n') { gt_file_xfputc(frame0_out[i], outfp); } } gt_file_xprintf(outfp, "</first_frame>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<second_frame>"); for (i = 0; i < outlen; i++) { if (frame1_out[i] != '\n') { gt_file_xfputc(frame1_out[i], outfp); } } gt_file_xprintf(outfp, "</second_frame>\n"); gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<third_frame>"); for (i = 0; i < outlen; i++) { if (frame2_out[i] != '\n') { gt_file_xfputc(frame2_out[i], outfp); } } gt_file_xprintf(outfp, "</third_frame>\n"); indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</translation>\n"); } else { showoutputlines(dotline, template_out, frame0_out, frame1_out, frame2_out, outlen, gen_strand_forward, gen_total_length, gen_offset, splicedseq->positionmapping, out); } gt_free(dotline); gt_free(template_out); gt_free(frame0_out); gt_free(frame1_out); gt_free(frame2_out); }