unsigned long gth_seq_con_get_length(GthSeqCon *seq_con, unsigned long seq_num) { GtRange range; gt_assert(seq_con); range = gth_seq_con_get_range(seq_con, seq_num); return gt_range_length(&range); }
/* The following function transforms the reference sequence positions to the opposite strand. This is necessary for proper chaining (only if palindromic matches have been calculated). */ static void transform_refseq_positions(GtArray *matches, GthSeqCon *ref_seq_con) { GtUword i, referencelength, referenceoffset; GtRange original, transformed; GthMatch *match; GtRange range; for (i = 0; i < gt_array_size(matches); i++) { match = gt_array_get(matches, i); /* get necessary data for transformation */ range = gth_seq_con_get_range(ref_seq_con, match->Storeseqnumreference); referencelength = range.end - range.start + 1; referenceoffset = range.start; /* store original match range */ original.start = match->Storepositionreference; original.end = original.start + match->Storelengthreference - 1; gt_assert(original.end > original.start); /* transform match range */ transformed.start = referencelength - 1 - (original.end - referenceoffset) + referenceoffset; transformed.end = referencelength - 1 - (original.start - referenceoffset) + referenceoffset; gt_assert(transformed.end > transformed.start); /* store transformed match range */ match->Storepositionreference = transformed.start; match->Storelengthreference = transformed.end - transformed.start + 1; } }
GtRange gth_seq_con_get_relative_range(GthSeqCon *seq_con, unsigned long seq_num) { GtRange relative_range, range; gt_assert(seq_con); gt_assert(seq_num < gth_seq_con_num_of_seqs(seq_con)); range = gth_seq_con_get_range(seq_con, seq_num); relative_range.start = 0; relative_range.end = range.end - range.start; return relative_range; }
static void calc_chains_from_matches(GthChainCollection *chain_collection, GtArray *matches, GthChainingInfo *chaining_info, GthSeqCon *gen_seq_con, GthSeqCon *ref_seq_con, GtUword rare, double fragweightfactor, GthJumpTableNew jump_table_new, GthJumpTableNewReverse jump_table_new_reverse, GthJumpTableDelete jump_table_delete) { GtUword i, numofchains = 0, num_of_fragments, maxbucketlength = 0; GtRange range; GtFile *outfp = chaining_info->call_info->out->outfp; GtFragment *fragments; GthSaveChainInfo info; GtArray *buckets; Bucket *bucket; /* this is a random sample to check that no equal matches exist either one match to chain or if more than one the first two differ */ gt_assert(gt_array_size(matches) == 1 || (gt_array_size(matches) > 1 && !gth_matches_are_equal(gt_array_get(matches, 0), gt_array_get(matches, 1)))); /* init */ buckets = gt_array_new(sizeof (Bucket)); /* output unsorted matches */ if (chaining_info->call_info->out->comments) { gt_file_xprintf(outfp, "%c output unsorted matches\n", COMMENTCHAR); showmatches(gt_array_get_space(matches), gt_array_size(matches), outfp); } /* transform reference sequence positions to opposite strand if necessary */ if (!chaining_info->directmatches) { if (chaining_info->call_info->out->comments) { gt_file_xprintf(outfp, "%c\n", COMMENTCHAR); gt_file_xprintf(outfp, "%c transform reference sequence positions to " "opposite strand\n", COMMENTCHAR); gt_file_xprintf(outfp, "%c\n", COMMENTCHAR); } transform_refseq_positions(matches, ref_seq_con); /* output transformed matches */ if (chaining_info->call_info->out->comments) { gt_file_xprintf(outfp, "%c output transformed matches\n", COMMENTCHAR); showmatches(gt_array_get_space(matches), gt_array_size(matches), outfp); } } /* sort matches */ sort_matches_and_calc_buckets(matches, buckets, &maxbucketlength); /* output sorted matches */ if (chaining_info->call_info->out->comments) { gt_file_xprintf(outfp, "%c output sorted matches\n", COMMENTCHAR); showmatches(gt_array_get_space(matches), gt_array_size(matches), outfp); } /* output buckets */ if (chaining_info->call_info->out->comments) { gt_file_xprintf(outfp, "%c output buckets\n", COMMENTCHAR); outputbuckets(buckets, gt_array_get_space(matches), outfp); } /* alloc space for fragments */ fragments = gt_malloc(sizeof (GtFragment) * maxbucketlength); /* save data to process the chains with saveChainasDPrange; constant part */ info.chain_collection = chain_collection; info.gcmincoverage = chaining_info->call_info->gcmincoverage; info.stat = chaining_info->stat; info.comments = chaining_info->call_info->out->comments; info.stopafterchaining = chaining_info->call_info->simfilterparam .stopafterchaining; info.paralogs = chaining_info->call_info->simfilterparam.paralogs; info.enrichchains = chaining_info->call_info->simfilterparam .enrichchains; info.jump_table = chaining_info->call_info->simfilterparam.jump_table; info.jump_table_new = jump_table_new; info.jump_table_new_reverse = jump_table_new_reverse; info.jump_table_delete = jump_table_delete; info.jtdebug = chaining_info->jtdebug; info.directmatches = chaining_info->directmatches; info.outfp = outfp; info.gen_file_num = chaining_info->gen_file_num; info.ref_file_num = chaining_info->ref_file_num; /* for every bucket a chain and for every chain a DP call (later maybe more than one chain) */ for (i = 0; i < gt_array_size(buckets); i++) { bucket = gt_array_get(buckets, i); if (chaining_info->call_info->out->showverbose) { if (chaining_info->refseqisindex && !chaining_info->call_info->simfilterparam.online) { /* in this case the exact number of chains is known */ numofchains = gt_array_size(buckets); } else { /* this expression gives an upper bound on the number of chains (because we do not know the exact number here) */ numofchains = chaining_info->bucketnum + gth_seq_con_num_of_seqs(gen_seq_con) * (gth_seq_con_num_of_seqs(ref_seq_con) - bucket->seqnum1); if (numofchains > chaining_info->maxbucketnum) numofchains = chaining_info->maxbucketnum; else chaining_info->maxbucketnum = numofchains; } } /* compute a set of fragments from every bucket of matches */ gthinitfragments(fragments, &num_of_fragments, (GthMatch*) gt_array_get_space(matches) + bucket->startpos, bucket->length, rare, fragweightfactor); if (chaining_info->call_info->out->showverbose) { show_chain_calc_status (chaining_info->call_info->out->showverbose, ++chaining_info->bucketnum, numofchains, num_of_fragments, chaining_info->gen_file_num, gth_input_num_of_gen_files(chaining_info->input), chaining_info->ref_file_num, gth_input_num_of_ref_files(chaining_info->input), chaining_info->directmatches, chaining_info->call_info->out->verboseseqs, bucket->seqnum2, bucket->seqnum1); } info.gen_seq_num = ((GthMatch*) gt_array_get(matches, bucket->startpos)) ->Storeseqnumgenomic; info.ref_seq_num = ((GthMatch*) gt_array_get(matches, bucket->startpos)) ->Storeseqnumreference; /* store genomic offset */ range = gth_seq_con_get_range(gen_seq_con, info.gen_seq_num); info.gen_total_length = range.end - range.start + 1; info.gen_offset = range.start; /* store length of reference sequence */ range = gth_seq_con_get_range(ref_seq_con, info.ref_seq_num); info.ref_total_length = range.end - range.start + 1; info.ref_offset = range.start; info.referencelength = range.end - range.start + 1; /* set number of remaining buckets */ info.numofremainingbuckets = gt_array_size(buckets) - i; if (chaining_info->call_info->simfilterparam.paralogs) { gt_globalchaining_coverage(fragments, num_of_fragments, chaining_info->call_info->gcmaxgapwidth, info.referencelength, ((double) chaining_info->call_info->gcmincoverage) / 100.0, gth_save_chain, &info); } else { gt_globalchaining_max(fragments, num_of_fragments, chaining_info->call_info->gcmaxgapwidth, gth_save_chain, &info); } } /* free space */ gt_array_delete(buckets); gt_free(fragments); }