static bool chain_is_filled_and_consistent(GthChain *chain, unsigned long gen_total_length, unsigned long gen_offset) { GtArray *testranges; /* check of file sequence numbers are defined */ if (chain->gen_file_num == GT_UNDEF_ULONG || chain->gen_seq_num == GT_UNDEF_ULONG || chain->ref_file_num == GT_UNDEF_ULONG || chain->ref_seq_num == GT_UNDEF_ULONG) { return false; } if (!gt_ranges_are_consecutive(chain->forwardranges)) return false; testranges = gt_array_new(sizeof (GtRange)); gt_ranges_copy_to_opposite_strand(testranges, chain->reverseranges, gen_total_length, gen_offset); if (!gt_ranges_are_equal(testranges, chain->forwardranges)) { gt_array_delete(testranges); return false; } gt_array_delete(testranges); return true; }
bool gth_sa_exons_are_forward_and_consecutive(const GthSA *sa) { GtArray *ranges; gt_assert(sa); ranges = gt_array_new(sizeof (GtRange)); gth_sa_get_exons(sa, ranges); if (!gt_ranges_are_consecutive(ranges)) { gt_array_delete(ranges); return false; } gt_array_delete(ranges); return true; }
void gth_save_chain(GtChain *chain, GtFragment *fragments, unsigned long num_of_fragments, GT_UNUSED unsigned long max_gap_width, void *data) { GthSaveChainInfo *info = (GthSaveChainInfo*) data; GtRange range; GthChain *gth_chain; unsigned long i, fragnum; gt_assert(chain_is_colinear(chain, fragments)); if (info->comments) { gt_file_xprintf(info->outfp, "%c process global chain with score %ld\n", COMMENTCHAR, gt_chain_get_score(chain)); gt_file_xprintf(info->outfp, "%c process global chain with the " "following fragments\n", COMMENTCHAR); for (i = 0; i < gt_chain_size(chain); i++) showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp); } /* init */ gth_chain = gth_chain_new(); gth_chain->gen_file_num = info->gen_file_num; gth_chain->gen_seq_num = info->gen_seq_num; gth_chain->ref_file_num = info->ref_file_num; gth_chain->ref_seq_num = info->ref_seq_num; /* chain has a minimum length of 1 */ gt_assert(gt_chain_size(chain)); /* global chain filter */ if (globalchainislongenough(chain, fragments, >h_chain->refseqcoverage, info->gcmincoverage, info->referencelength, info->stat, info->comments, info->outfp)) { /* save all potential exons */ for (i = 0; i < gt_chain_size(chain); i++) { fragnum = gt_chain_get_fragnum(chain, i); range.start = fragments[fragnum].startpos2; range.end = fragments[fragnum].endpos2; /* check for overlap */ if (i > 0 && range.start <= ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end) { /* overlap found -> modify last range */ gt_assert(((GtRange*) gt_array_get_last(gth_chain->forwardranges)) ->end <= range.end); ((GtRange*) gt_array_get_last(gth_chain->forwardranges))->end = range.end; } else { #ifndef NDEBUG if (i > 0) { /* gap width is smaller or equal than the maximum gap width */ gt_assert((range.start - 1 - ((GtRange*) gt_array_get_last(gth_chain->forwardranges)) ->end + 1 - 1) <= max_gap_width); } #endif /* save range */ gt_array_add(gth_chain->forwardranges, range); } } GtRange genomicrange = chain_get_genomicrange(gth_chain); if (info->enrichchains) { enrich_chain(gth_chain, fragments, num_of_fragments, info->comments, info->outfp); } gt_assert(gt_ranges_are_consecutive(gth_chain->forwardranges)); /* copy ranges to opposite strand */ gt_ranges_copy_to_opposite_strand(gth_chain->reverseranges, gth_chain->forwardranges, info->gen_total_length, info->gen_offset); /* compute jump table if necessary */ if (info->jump_table) { GthJumpTable *forward_jump_table, *reverse_jump_table; GtArray *chain_fragments; chain_fragments = make_list_of_chain_fragments(chain, fragments, num_of_fragments, info->enrichchains, &genomicrange); forward_jump_table = info->jump_table_new(gt_array_get_space(chain_fragments), gt_array_size(chain_fragments), info->jtdebug); reverse_jump_table = info->jump_table_new_reverse(forward_jump_table, info->gen_total_length, info->gen_offset, info->ref_total_length, info->ref_offset); gt_assert(!gth_chain->forward_jump_table); gth_chain->forward_jump_table = forward_jump_table; gt_assert(!gth_chain->reverse_jump_table); gth_chain->reverse_jump_table = reverse_jump_table; gt_array_delete(chain_fragments); gth_chain->jump_table_delete = info->jump_table_delete; } /* save array of potential exons */ gth_chain_collection_add(info->chain_collection, gth_chain); if (info->comments) { gt_file_xprintf(info->outfp, "%c global chain with the following " "ranges has been saved\n",COMMENTCHAR); gt_file_xprintf(info->outfp, "%c forward ranges:\n", COMMENTCHAR); gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR); gt_ranges_show(gth_chain->forwardranges, info->outfp); gt_file_xprintf(info->outfp, "%c reverse ranges:\n", COMMENTCHAR); gt_file_xprintf(info->outfp, "%c ", COMMENTCHAR); gt_ranges_show(gth_chain->reverseranges, info->outfp); } /* output stored chains here (Mohamed needed this to compare the chaining phase of gth with CHAINER) */ if (info->stopafterchaining) { gt_file_xprintf(info->outfp, "%c gl. chain with coverage=%.2f and score %ld " "(genseq=%lu, str.=%c, refseq=%lu)\n", COMMENTCHAR, gth_chain->refseqcoverage, gt_chain_get_score(chain), gth_chain->gen_seq_num, SHOWSTRAND(info->directmatches), gth_chain->ref_seq_num); for (i = 0; i < gt_chain_size(chain); i++) showfragment(fragments + gt_chain_get_fragnum(chain, i), info->outfp); } } else { /* for -paralogs this case is not supposed to occur */ gt_assert(!info->paralogs); if (info->comments) gt_file_xprintf(info->outfp, "%c global chain discarded\n", COMMENTCHAR); gth_chain_delete(gth_chain); } }
/* XXX: change this function: add more sophisticated extension strategy */ void gth_chain_extend_borders(GthChain *chain, const GtRange *gen_seq_bounds, const GtRange *gen_seq_bounds_rc, GT_UNUSED unsigned long gen_total_length, GT_UNUSED unsigned long gen_offset) { long tmpborder; /* at least one range in chain */ gt_assert(gt_array_size(chain->forwardranges)); /* forward range borders are in considered genomic region */ gt_assert(gt_ranges_borders_are_in_region(chain->forwardranges, gen_seq_bounds)); /* reverse range borders are in considered genomic region */ gt_assert(gt_ranges_borders_are_in_region(chain->reverseranges, gen_seq_bounds_rc)); /* chain->forwardranges is forward and consecutive */ gt_assert(gt_ranges_are_consecutive(chain->forwardranges)); /* valid sequence bounds */ gt_assert(gen_seq_bounds->start <= gen_seq_bounds->end); gt_assert(gen_seq_bounds_rc->start <= gen_seq_bounds_rc->end); /* set start border, forward strand */ tmpborder = gt_safe_cast2long(((GtRange*) gt_array_get_first(chain->forwardranges)) ->start); tmpborder -= DPEXTENSION; if (tmpborder < gt_safe_cast2long(gen_seq_bounds->start)) tmpborder = gen_seq_bounds->start; ((GtRange*) gt_array_get_first(chain->forwardranges))->start = gt_safe_cast2ulong(tmpborder); /* set start border, reverse complement strand */ tmpborder = gt_safe_cast2long(((GtRange*) gt_array_get_first(chain->reverseranges)) ->start); tmpborder -= DPEXTENSION; if (tmpborder < gt_safe_cast2long(gen_seq_bounds_rc->start)) tmpborder = gen_seq_bounds_rc->start; ((GtRange*) gt_array_get_first(chain->reverseranges))->start = gt_safe_cast2ulong(tmpborder); /* set end border, forward strand */ tmpborder = gt_safe_cast2long(((GtRange*) gt_array_get_last(chain->forwardranges)) ->end); tmpborder += DPEXTENSION; if (tmpborder > gt_safe_cast2long(gen_seq_bounds->end)) tmpborder = gen_seq_bounds->end; ((GtRange*) gt_array_get_last(chain->forwardranges))->end = gt_safe_cast2ulong(tmpborder); /* set end border, reverse complement strand */ tmpborder = gt_safe_cast2long(((GtRange*) gt_array_get_last(chain->reverseranges)) ->end); tmpborder += DPEXTENSION; if (tmpborder > gt_safe_cast2long(gen_seq_bounds_rc->end)) tmpborder = gen_seq_bounds_rc->end; ((GtRange*) gt_array_get_last(chain->reverseranges))->end = gt_safe_cast2ulong(tmpborder); gt_assert(chain_is_filled_and_consistent(chain, gen_total_length, gen_offset)); }