static void show_pgl(GthPGL *pgl, GtUword pglnum, GtUword translationtable, GthInput *input, unsigned int indentlevel, GthOutput *out) { GtUword i; GtFile *outfp = out->outfp; gt_assert(!out->gff3out); if (out->xmlout) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<predicted_gene_location>\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<PGL_line PGL_serial=\"" GT_WU "\" " "PGL_strand=\"%c\" PGL_start=\"" GT_WU "\" PGL_stop=\"" GT_WU "\"/>\n", pglnum + OUTPUTOFFSET, SHOWSTRAND(gth_pgl_is_forward(pgl)), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.start), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.end)); } else { gt_file_xprintf(outfp, "PGL %3" GT_WUS " (%c strand): " GT_WU " " GT_WU, pglnum + OUTPUTOFFSET, SHOWSTRAND(gth_pgl_is_forward(pgl)), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.start), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.end)); if (out->pglgentemplate) gt_file_xprintf(outfp, " (genomic template '%s')", gth_pgl_gen_id(pgl)); gt_file_xfputc('\n', outfp); } for (i = 0; i < gt_array_size(pgl->assemblies); i++) { show_ags(gth_pgl_get_ags(pgl, i), pglnum, i, translationtable, input, indentlevel, out); } if (out->xmlout) { indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</predicted_gene_location>\n"); } }
static void xml_show_pgl(GthPGL *pgl, unsigned long pglnum, unsigned long translationtable, GthInput *input, unsigned int indentlevel, GthOutput *out) { unsigned long i; gth_indent(out->outfp, indentlevel); gt_file_xprintf(out->outfp, "<predicted_gene_location>\n"); indentlevel++; gth_indent(out->outfp, indentlevel); gt_file_xprintf(out->outfp, "<PGL_line PGL_serial=\"%lu\" PGL_strand=\"%c\" " "PGL_start=\"%lu\" PGL_stop=\"%lu\"/>\n", pglnum + OUTPUTOFFSET, SHOWSTRAND(gth_pgl_is_forward(pgl)), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.start), SHOWGENPOS(gth_pgl_is_forward(pgl), gth_pgl_total_length(pgl), gth_pgl_genomic_offset(pgl), pgl->maxrange.end)); for (i = 0; i < gth_pgl_num_of_ags(pgl); i++) { xml_show_ags(gth_pgl_get_ags(pgl, i), pglnum, i, translationtable, input, indentlevel, out); } indentlevel--; gth_indent(out->outfp, indentlevel); gt_file_xprintf(out->outfp, "</predicted_gene_location>\n"); }
static void show_chain_calc_status(GthShowVerbose showverbose, GtUword chainnum, GtUword numofchains, GtUword numofmatches, GtUword currentgen_file_num, GtUword numofgenomicfiles, GtUword currentreffilenum, GtUword numofreffiles, bool directmatches, bool verboseseqs, GtUword genseqnum, GtUword refseqnum) { char buf[SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE]; GT_UNUSED int rval; gt_assert(numofchains > 0); if (numofgenomicfiles == 1 && numofreffiles == 1) { rval = snprintf(buf, SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE, "d=%c, compute chains for bucket "GT_WU"/"GT_WU " (matches in bucket="GT_WU")", SHOWSTRAND(directmatches), chainnum, numofchains, numofmatches); } else { rval = snprintf(buf, SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE, "gf="GT_WU"/"GT_WU", d=%c, rf="GT_WU"/"GT_WU ", compute chains for bucket "GT_WU"/"GT_WU " (matches in bucket="GT_WU")", currentgen_file_num + 1, numofgenomicfiles, SHOWSTRAND(directmatches), currentreffilenum + 1, numofreffiles, chainnum, numofchains, numofmatches); } /* buf[SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE] is large enough */ gt_assert(rval < SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE); showverbose(buf); if (verboseseqs) { rval = snprintf(buf, SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE, "genseqnum="GT_WU", refseqnum="GT_WU"", genseqnum, refseqnum); /* buf[SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE] is large enough */ gt_assert(rval < SHOW_CHAIN_CALCULATION_STATUS_BUF_SIZE); showverbose(buf); } }
static void show_matrix_calculation_status(GthShowVerbose showverbose, bool gen_strand_forward, bool ref_strand_forward, bool introncutout, GtUword chainctr, GtUword num_of_chains, GtUword icdelta, GtUword gen_file_num, GtUword num_of_gen_files, GtUword ref_file_num, GtUword num_of_ref_files, bool directmatches, bool verboseseqs, const char *gen_id, const char *ref_id) { char buf[SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE], icdeltastring[ICDELTASTRINGLENGTH]; GT_UNUSED int rval; if (introncutout) { rval = snprintf(icdeltastring, ICDELTASTRINGLENGTH, ", icdelta=" GT_WU , icdelta); /* buffer icdeltastring[ICDELTASTRINGLENGTH] is large enough */ gt_assert(rval < ICDELTASTRINGLENGTH); } if (num_of_gen_files == 1 && num_of_ref_files == 1) { rval = snprintf(buf, SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE, "d=%c, compute spliced alignment, genseq=%c, " "chain=" GT_WU "/" GT_WU ", refseq=%c%s", SHOWSTRAND(directmatches), SHOWSTRAND(gen_strand_forward), chainctr + 1, num_of_chains, SHOWSTRAND(ref_strand_forward), introncutout ? icdeltastring : ""); } else { rval = snprintf(buf, SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE, "gf=" GT_WU "/" GT_WU ", d=%c, rf=" GT_WU "/" GT_WU ", compute spliced alignment, gs=%c, chain=" GT_WU "/" GT_WU ", rs=%c%s", gen_file_num + 1, num_of_gen_files, SHOWSTRAND(directmatches), ref_file_num + 1, num_of_ref_files, SHOWSTRAND(gen_strand_forward), chainctr + 1, num_of_chains, SHOWSTRAND(ref_strand_forward), introncutout ? icdeltastring : ""); } /* buf[SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE] is large enough */ gt_assert(rval < SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE); showverbose(buf); if (verboseseqs) { rval = snprintf(buf, SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE, "genomicid=%s, referenceid=%s", gen_id, ref_id); /* buf[SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE] is large enough */ gt_assert(rval < SHOW_MATRIX_CALCULATION_STATUS_BUF_SIZE); showverbose(buf); } }
void gth_save_chain(GtChain *chain, GtFragment *fragments, unsigned long num_of_fragments, GT_UNUSED unsigned long max_gap_width, void *data) { GthSaveChainInfo *info = (GthSaveChainInfo*) data; GtRange range; GthChain *gth_chain; unsigned long i, fragnum; gt_assert(chain_is_colinear(chain, fragments)); if (info->comments) { gt_file_xprintf(info->outfp, "%c process global chain with score %ld\n", COMMENTCHAR, gt_chain_get_score(chain)); gt_file_xprintf(info->outfp, "%c process global chain with the " "following fragments\n", COMMENTCHAR); for (i = 0; i < gt_chain_size(chain); i++) showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp); } /* init */ gth_chain = gth_chain_new(); gth_chain->gen_file_num = info->gen_file_num; gth_chain->gen_seq_num = info->gen_seq_num; gth_chain->ref_file_num = info->ref_file_num; gth_chain->ref_seq_num = info->ref_seq_num; /* chain has a minimum length of 1 */ gt_assert(gt_chain_size(chain)); /* global chain filter */ if (globalchainislongenough(chain, fragments, >h_chain->refseqcoverage, info->gcmincoverage, info->referencelength, info->stat, info->comments, info->outfp)) { /* save all potential exons */ for (i = 0; i < gt_chain_size(chain); i++) { fragnum = gt_chain_get_fragnum(chain, i); range.start = fragments[fragnum].startpos2; range.end = fragments[fragnum].endpos2; /* check for overlap */ if (i > 0 && range.start <= ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end) { /* overlap found -> modify last range */ gt_assert(((GtRange*) gt_array_get_last(gth_chain->forwardranges)) ->end <= range.end); ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end = range.end; } else { #ifndef NDEBUG if (i > 0) { /* gap width is smaller or equal than the maximum gap width */ gt_assert((range.start - 1 - ((GtRange*) gt_array_get_last(gth_chain->forwardranges)) ->end + 1 - 1) <= max_gap_width); } #endif /* save range */ gt_array_add(gth_chain->forwardranges, range); } } GtRange genomicrange = chain_get_genomicrange(gth_chain); if (info->enrichchains) { enrich_chain(gth_chain, fragments, num_of_fragments, info->comments, info->outfp); } gt_assert(gt_ranges_are_consecutive(gth_chain->forwardranges)); /* copy ranges to opposite strand */ gt_ranges_copy_to_opposite_strand(gth_chain->reverseranges, gth_chain->forwardranges, info->gen_total_length, info->gen_offset); /* compute jump table if necessary */ if (info->jump_table) { GthJumpTable *forward_jump_table, *reverse_jump_table; GtArray *chain_fragments; chain_fragments = make_list_of_chain_fragments(chain, fragments, num_of_fragments, info->enrichchains, &genomicrange); forward_jump_table = info->jump_table_new(gt_array_get_space(chain_fragments), gt_array_size(chain_fragments), info->jtdebug); reverse_jump_table = info->jump_table_new_reverse(forward_jump_table, info->gen_total_length, info->gen_offset, info->ref_total_length, info->ref_offset); gt_assert(!gth_chain->forward_jump_table); gth_chain->forward_jump_table = forward_jump_table; gt_assert(!gth_chain->reverse_jump_table); gth_chain->reverse_jump_table = reverse_jump_table; gt_array_delete(chain_fragments); gth_chain->jump_table_delete = info->jump_table_delete; } /* save array of potential exons */ gth_chain_collection_add(info->chain_collection, gth_chain); if (info->comments) { gt_file_xprintf(info->outfp, "%c global chain with the following " "ranges has been saved\n",COMMENTCHAR); gt_file_xprintf(info->outfp, "%c forward ranges:\n", COMMENTCHAR); gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR); gt_ranges_show(gth_chain->forwardranges, info->outfp); gt_file_xprintf(info->outfp, "%c reverse ranges:\n", COMMENTCHAR); gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR); gt_ranges_show(gth_chain->reverseranges, info->outfp); } /* output stored chains here (Mohamed needed this to compare the chaining phase of gth with CHAINER) */ if (info->stopafterchaining) { gt_file_xprintf(info->outfp, "%c gl. chain with coverage=%.2f and score %ld " "(genseq=%lu, str.=%c, refseq=%lu)\n", COMMENTCHAR, gth_chain->refseqcoverage, gt_chain_get_score(chain), gth_chain->gen_seq_num, SHOWSTRAND(info->directmatches), gth_chain->ref_seq_num); for (i = 0; i < gt_chain_size(chain); i++) showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp); } } else { /* for -paralogs this case is not supposed to occur */ gt_assert(!info->paralogs); if (info->comments) gt_file_xprintf(info->outfp, "%c global chain discarded\n", COMMENTCHAR); gth_chain_delete(gth_chain); } }
char gth_sa_ref_strand_char(const GthSA *sa) { gt_assert(sa); return SHOWSTRAND(sa->ref_strand_forward); }
void gt_outputtranslationandorf(unsigned long pglnum, const GthAGS *ags, unsigned long agsnum, unsigned long translationtable, GthInput *input, unsigned int indentlevel, GthOutput *out) { unsigned long i; unsigned int nframe; const unsigned char *gen_seq_orig; GtStr *frame[3]; char translated; GtTranslatorStatus status; GtTranslator *translator; GtTransTable *transtable; GtCodonIterator *ci; GthSplicedSeq *spliced_seq; GtArray *ranges; GtFile *outfp = out->outfp; /* output header */ if (out->xmlout) { gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<three_phase_translation " "xmlns=\"http://www.genomethreader.org/GTH_output/" "PGL_module/predicted_gene_location/AGS_information/" "three_phase_translation/\">\n"); indentlevel++; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "<description PGL_serial=\"%lu\" " "AGS_serial=\"%lu\" gDNA_strand=\"%c\"/>\n", pglnum + OUTPUTOFFSET, agsnum + OUTPUTOFFSET, SHOWSTRAND(gth_ags_is_forward(ags))); } else { gt_file_xprintf(outfp, "3-phase translation of AGS-%lu (%cstrand):\n\n", agsnum + OUTPUTOFFSET, SHOWSTRAND(gth_ags_is_forward(ags))); } ranges = gt_array_new(sizeof (GtRange)); for (i = 0; i < gt_array_size(ags->exons); i++) gt_array_add(ranges, ((GthExonAGS*) gt_array_get(ags->exons, i))->range); /* get genomic sequence */ gen_seq_orig = gth_input_original_genomic_sequence(input, gth_ags_filenum(ags), gth_ags_is_forward(ags)); spliced_seq = gth_spliced_seq_new(gen_seq_orig, ranges); frame[0] = gt_str_new(); frame[1] = gt_str_new(); frame[2] = gt_str_new(); /* prepare for translation */ ci = gt_codon_iterator_simple_new((const char*) spliced_seq->splicedseq, spliced_seq->splicedseqlen, NULL); gt_assert(ci); transtable = gt_trans_table_new(translationtable, NULL); gt_assert(transtable); /* translate the template in all three frames */ translator = gt_translator_new_with_table(transtable, ci); status = gt_translator_next(translator, &translated, &nframe, NULL); while (status == GT_TRANSLATOR_OK) { gt_str_append_char(frame[nframe], translated); status = gt_translator_next(translator, &translated, &nframe, NULL); } gt_assert(status != GT_TRANSLATOR_ERROR); gt_translator_delete(translator); gt_trans_table_delete(transtable); gt_codon_iterator_delete(ci); /* show the translation */ showtranslation(spliced_seq, gt_str_get(frame[0]), gt_str_get(frame[1]), gt_str_get(frame[2]), ags->exons, gth_ags_is_forward(ags), gth_ags_total_length(ags), gth_ags_genomic_offset(ags), indentlevel, out); /* show the (consolidated) ORFs */ gthshowORFs(gt_str_get(frame[0]), gt_str_get(frame[1]), gt_str_get(frame[2]), gt_str_length(frame[0]), gt_str_length(frame[1]), gt_str_length(frame[2]), gth_ags_is_forward(ags), gth_ags_total_length(ags), gth_ags_genomic_offset(ags), gt_str_get(ags->gen_id), pglnum, agsnum, spliced_seq, indentlevel, out); if (out->xmlout) { indentlevel--; gth_indent(outfp, indentlevel); gt_file_xprintf(outfp, "</three_phase_translation>\n"); } gth_spliced_seq_delete(spliced_seq); gt_array_delete(ranges); gt_str_delete(frame[0]); gt_str_delete(frame[1]); gt_str_delete(frame[2]); }