static GtUword gt_radixsort_findfirstlarger(const GtUword *leftborder, GtUword start, GtUword end, GtUword offset) { const GtUword *left = leftborder + start, *right = leftborder + end, *found = leftborder + end; while (left <= right) { const GtUword *mid = left + GT_DIV2(right-left); gt_assert(mid >= leftborder + start && mid <= leftborder + end); if (offset == *mid) { return (GtUword) (mid - leftborder); } if (offset < *mid) { found = mid; right = mid - 1; } else { left = mid + 1; } } gt_assert(found >= leftborder); return (GtUword) (found - leftborder); }
static GtUword lcpintervalfindlast(const GtEncseq *encseq, GtReadmode readmode, GtUword totallength, const ESASuffixptr *suftab, GtUchar cc, GtUword offset, GtUword left, GtUword right) { GtUword found = ULONG_MAX; while (left <= right) { GtUword mid = left + GT_DIV2(right - left + 1); GtUword pos = ESASUFFIXPTRGET(suftab,mid) + offset; GtUchar midcc = SEQUENCE(encseq,pos); if (cc < midcc) { if (mid == 0) { break; } right = mid - 1; } else { if (cc == midcc) { found = mid; } left = mid + 1; } } return found; }
static unsigned long lcpintervalfindrightbound(const GtEncseq *encseq, GtReadmode readmode, unsigned long totallength, const ESASuffixptr *suftab, GtUchar cc, unsigned long offset, unsigned long left, unsigned long right) { unsigned long pos, mid; GtUchar midcc; while (right > left+1) { mid = GT_DIV2(left+right); pos = ESASUFFIXPTRGET(suftab,mid) + offset; midcc = SEQUENCE(encseq,pos); if (cc < midcc) { right = mid; } else { left = mid; } } return left; }
static GtUword gt_intset_16_binarysearch_sec_idx_largest_seq(GtUword *sectionstart, GtUword *secend, GtUword idx) { GtUword *midptr = NULL, *found = NULL, *startorig = sectionstart; if (*sectionstart <= idx) found = sectionstart; while (sectionstart < secend) { midptr = sectionstart + (GtUword) GT_DIV2(secend - sectionstart); if (*midptr < idx) { found = midptr; if (*midptr == idx) { break; } sectionstart = midptr + 1; } else { secend = midptr - 1; } } gt_assert(found != NULL); while (found[1] <= idx) found++; return (GtUword) (found - startorig); }
static unsigned long searchfastaqueryindes(const char *extractkey, const char *keytab, unsigned long numofkeys, unsigned long keysize) { unsigned long left = 0, right = numofkeys - 1, mid; int cmp; while (left <= right) { mid = left + GT_DIV2((unsigned long) (right-left)); cmp = strcmp(extractkey,keytab + 1UL + mid * (keysize+1)); if (cmp < 0) { gt_assert(mid > 0); right = mid-1; } else { if (cmp > 0) { left = mid+1; } else { gt_assert(mid < numofkeys); return mid; } } } return numofkeys; }
static /*@null@*/ const Largecount *binsearchLargecount(GtUword key, const Largecount *left, const Largecount *right) { const Largecount *leftptr = left, *midptr, *rightptr = right; GtUword len; while (leftptr<=rightptr) { len = (GtUword) (rightptr-leftptr); midptr = leftptr + GT_DIV2(len); /* halve len */ if (key < midptr->idx) { rightptr = midptr-1; } else { if (key > midptr->idx) { leftptr = midptr + 1; } else { return midptr; } } } return NULL; }
GtUword gt_fromrank2pos(const Rankedbounds *leftptr, const Rankedbounds *rightptr, GtUword rank) { const Rankedbounds *midptr; while (leftptr <= rightptr) { midptr = leftptr + GT_DIV2((GtUword) (rightptr-leftptr)); if (rank < midptr->rank) { rightptr = midptr-1; } else { if (rank >= midptr->rank + (midptr->upperbound - midptr->lowerbound)) { leftptr = midptr + 1; } else { return midptr->lowerbound + (rank - midptr->rank); } } } fprintf(stderr,"fromrank2rank: cannot find rank "GT_WU"" " in ranges",rank); exit(GT_EXIT_PROGRAMMING_ERROR); /*@ignore@*/ return 0; /*@end@*/ }
GtUword gt_frompos2rank(const Rankedbounds *leftptr, const Rankedbounds *rightptr, GtUword specialpos) { const Rankedbounds *midptr; while (leftptr <= rightptr) { midptr = leftptr + GT_DIV2((GtUword) (rightptr-leftptr)); if (specialpos < midptr->lowerbound) { rightptr = midptr-1; } else { if (specialpos >= midptr->upperbound) { leftptr = midptr + 1; } else { return midptr->rank + specialpos - midptr->lowerbound; } } } fprintf(stderr,"frompos2rank: cannot find pos "GT_WU"" " in ranges",specialpos); exit(GT_EXIT_PROGRAMMING_ERROR); /*@ignore@*/ return 0; /*@end@*/ }
static GtUword gt_wtree_encseq_select_rec(GtWtreeEncseq *we, GtUword i, GtWtreeSymbol sym, GtUword node_start, GtUword node_size, unsigned int alpha_start, unsigned int alpha_end) { unsigned int middle = GT_DIV2(alpha_start + alpha_end); int bit; GtUword zero_rank_prefix = 0, one_rank_prefix = 0, left_child_size, child_start; if (alpha_start < alpha_end) { bit = middle < (unsigned int) sym ? 1 : 0; if (node_start != 0) zero_rank_prefix = gt_compressed_bitsequence_rank_0(we->c_bits, node_start - 1); left_child_size = gt_compressed_bitsequence_rank_0(we->c_bits, node_start + node_size - 1) - zero_rank_prefix; if (bit == 0) { alpha_end = middle; child_start = node_start + we->parent_instance.members->length; node_size = left_child_size; } else { if (node_start != 0) one_rank_prefix = gt_compressed_bitsequence_rank_1(we->c_bits, node_start - 1); alpha_start = middle + 1; node_size = gt_compressed_bitsequence_rank_1(we->c_bits, node_start + node_size - 1) - one_rank_prefix; child_start = node_start + we->parent_instance.members->length + left_child_size; } if (node_size != 0) { i = gt_wtree_encseq_select_rec(we, i, sym, child_start, node_size, alpha_start, alpha_end); if (i < node_size) { return (bit == 0 ? gt_compressed_bitsequence_select_0(we->c_bits, zero_rank_prefix + i + 1) : gt_compressed_bitsequence_select_1(we->c_bits, one_rank_prefix + i + 1)) - node_start; } } return ULONG_MAX; } if (i <= node_size) return i - 1; return ULONG_MAX; }
static bool gt_wtree_encseq_set_nodestart_and_current_fo(GtWtreeEncseq *we, unsigned int level, GtWtreeSymbol sym) { unsigned int alpha_end = we->alpha_size - 1, alpha_start = 0, middle = GT_DIV2(alpha_end); unsigned int c_level = 0; gt_assert(sym <= (GtWtreeSymbol) alpha_end); we->current_fo = we->root_fo; we->node_start = 0; while (c_level < level && alpha_end > alpha_start) { if (sym <= (GtWtreeSymbol) middle) { alpha_end = middle; if (alpha_end > alpha_start && we->current_fo->left == NULL) we->current_fo->left = gt_wtree_encseq_fill_offset_new(); we->node_start = we->node_start + we->parent_instance.members->length; we->current_fo = we->current_fo->left; } else { alpha_start = middle + 1; if (alpha_end > alpha_start && we->current_fo->right == NULL) we->current_fo->right = gt_wtree_encseq_fill_offset_new(); /* start of right child: start of left + size of left */ we->node_start = we->node_start + we->parent_instance.members->length + we->current_fo->left_size; we->current_fo = we->current_fo->right; } middle = GT_DIV2(alpha_start + alpha_end); c_level++; } return (sym <= (GtWtreeSymbol) middle); }
GtUword gt_condenseq_uniques_position_binsearch(const GtCondenseq *condenseq, GtUword position) { GtWord idx, low, high; gt_assert(condenseq && condenseq->udb_nelems > 0); low = (GtWord) -1; gt_safe_assign(high, condenseq->udb_nelems); idx = GT_DIV2(low + high); while (high - low > (GtWord) 1) { if (position < condenseq->uniques[idx].orig_startpos) { high = idx; } else { low = idx; } idx = GT_DIV2(low + high); } if (low > (GtWord) -1 && condenseq->uniques[idx].orig_startpos <= position) return (GtUword) idx; return condenseq->udb_nelems; }
static GtWtreeSymbol gt_wtree_encseq_access_rec(GtWtreeEncseq *we, GtUword pos, GtUword node_start, GtUword node_size, unsigned int alpha_start, unsigned int alpha_end) { unsigned int middle = GT_DIV2(alpha_start + alpha_end); int bit; GtUword zero_rank_prefix = 0, one_rank_prefix = 0, left_child_size; gt_assert(pos < node_size); if (alpha_start < alpha_end) { bit = gt_compressed_bitsequence_access(we->c_bits, node_start + pos); if (node_start != 0) zero_rank_prefix = gt_compressed_bitsequence_rank_0(we->c_bits, node_start - 1); left_child_size = gt_compressed_bitsequence_rank_0(we->c_bits, node_start + node_size - 1) - zero_rank_prefix; if (bit == 0) { pos = gt_compressed_bitsequence_rank_0(we->c_bits, node_start + pos) - zero_rank_prefix - 1; /*convert count (rank) to positon */ alpha_end = middle; node_start += we->parent_instance.members->length; node_size = left_child_size; return gt_wtree_encseq_access_rec(we, pos, node_start, node_size, alpha_start, alpha_end); } else { if (node_start != 0) one_rank_prefix = gt_compressed_bitsequence_rank_1(we->c_bits, node_start - 1); pos = gt_compressed_bitsequence_rank_1(we->c_bits, node_start + pos) - one_rank_prefix - 1; /*convert count (rank) to positon */ alpha_start = middle + 1; node_size = gt_compressed_bitsequence_rank_1(we->c_bits, node_start + node_size - 1) - one_rank_prefix; node_start += we->parent_instance.members->length + left_child_size; return gt_wtree_encseq_access_rec(we, pos, node_start, node_size, alpha_start, alpha_end); } } return (GtWtreeSymbol) alpha_start; }
static bool gt_intset_16_binarysearch_is_member(const uint16_t *leftptr, const uint16_t *rightptr, uint16_t elem) { const uint16_t *midptr; while (leftptr <= rightptr) { midptr = leftptr + (GtUword) GT_DIV2(rightptr - leftptr); if (elem < *midptr) { rightptr = midptr - 1; } else { if (elem > *midptr) leftptr = midptr + 1; else return true; } } return false; }
static GtUword gt_intset_16_binarysearch_idx_sm_geq(const uint16_t *leftptr, const uint16_t *rightptr, uint16_t value) { const uint16_t *midptr = NULL, *leftorig = leftptr; gt_assert(leftptr <= rightptr); if (value <= *leftptr) return 0; if (value > *rightptr) return 1UL + (GtUword) (rightptr - leftptr); while (leftptr < rightptr) { midptr = leftptr + (GtUword) GT_DIV2(rightptr - leftptr); if (value <= *midptr) rightptr = midptr; else { leftptr = midptr + 1; } } return (GtUword) (leftptr - leftorig); }
void gt_computefmkeyvalues (Fmindex *fm, const GtSpecialcharinfo *specialcharinfo, GtUword bwtlength, unsigned int log2bsize, unsigned int log2markdist, unsigned int numofchars, unsigned int suffixlength, bool storeindexpos) { fm->mappedptr = NULL; fm->log2bsize = log2bsize; fm->log2markdist = log2markdist; fm->bwtlength = bwtlength; fm->log2superbsize = GT_MULT2 (fm->log2bsize); fm->bsize = (unsigned int) GT_POW2 (fm->log2bsize); fm->bsizehalve = GT_DIV2(fm->bsize); fm->superbsize = (unsigned int) GT_POW2 (fm->log2superbsize); fm->nofblocks = (GtUword) (fm->bwtlength / fm->bsize) + 1; fm->nofsuperblocks = (GtUword) (fm->bwtlength / fm->superbsize) + 2; fm->markdist = (GtUword) GT_POW2 (fm->log2markdist); fm->markdistminus1 = (GtUword) (fm->markdist - 1); fm->negatebsizeones = ~ (GtUword) (fm->bsize - 1); fm->negatesuperbsizeones = ~ (GtUword) (fm->superbsize - 1); fm->log2superbsizeminuslog2bsize = fm->log2superbsize - fm->log2bsize; fm->mapsize = numofchars+1; fm->suffixlength = suffixlength; if (fm->suffixlength > 0) { fm->numofcodes = gt_power_for_small_exponents(fm->mapsize-1, fm->suffixlength); } else { fm->numofcodes = 0; } fm->sizeofindex = determinefmindexsize (fm, specialcharinfo, suffixlength, storeindexpos); }
static unsigned long searchdesinfastakeyqueries(const char *extractkey, const Fastakeyquery *fastakeyqueries, unsigned long numofqueries) { const Fastakeyquery *leftptr, *rightptr, *midptr; int cmp; leftptr = fastakeyqueries; rightptr = fastakeyqueries + numofqueries - 1; while (leftptr <= rightptr) { midptr = leftptr + GT_DIV2((unsigned long) (rightptr-leftptr)); cmp = strcmp(extractkey,midptr->fastakey); if (cmp == 0) { if (midptr > fastakeyqueries && strcmp(extractkey,(midptr-1)->fastakey) == 0) { rightptr = midptr - 1; } else { return (unsigned long) (midptr - fastakeyqueries); } } else { if (cmp < 0) { rightptr = midptr-1; } else { leftptr = midptr + 1; } } } return numofqueries; }
static int gt_readjoiner_cnttest_runner(GT_UNUSED int argc, GT_UNUSED const char **argv, GT_UNUSED int parsed_args, void *tool_arguments, GT_UNUSED GtError *err) { GtReadjoinerCnttestArguments *arguments = tool_arguments; GtEncseqLoader *el = NULL; GtEncseq *reads = NULL; GtBitsequence *bits = NULL; GtUword nofreads; int had_err = 0; gt_error_check(err); gt_assert(arguments); if (arguments->test == GT_READJOINER_CNTTEST_SHOWLIST) { GtStr *fn = NULL; fn = gt_str_clone(arguments->readset); gt_str_append_cstr(fn, GT_READJOINER_SUFFIX_CNTLIST); had_err = gt_cntlist_parse(gt_str_get(fn), true, &bits, &nofreads, err); gt_str_delete(fn); } else if (arguments->test == GT_READJOINER_CNTTEST_BRUTEFORCE || arguments->test == GT_READJOINER_CNTTEST_KMP) { el = gt_encseq_loader_new(); gt_encseq_loader_drop_description_support(el); gt_encseq_loader_disable_autosupport(el); if (!arguments->singlestrand) gt_encseq_loader_mirror(el); reads = gt_encseq_loader_load(el, gt_str_get(arguments->readset), err); if (reads == NULL) had_err = -1; else { gt_rdj_pairwise_exact(GT_OVLFIND_CNT, reads, !arguments->singlestrand, false, arguments->test == GT_READJOINER_CNTTEST_KMP, 1UL, true, NULL, NULL, false, NULL, &bits, &nofreads); } gt_encseq_delete(reads); gt_encseq_loader_delete(el); } else if (arguments->test == GT_READJOINER_CNTTEST_ESA) { Sequentialsuffixarrayreader *ssar = NULL; GtUword readlength = 0, firstrevcompl = 0; GtLogger *verbose_logger = gt_logger_new(arguments->verbose, GT_LOGGER_DEFLT_PREFIX, stderr); ssar = gt_newSequentialsuffixarrayreaderfromfile(gt_str_get( arguments->readset), SARR_LCPTAB | SARR_SUFTAB | SARR_SSPTAB, true, verbose_logger, err); if (gt_error_is_set(err)) had_err = -1; else { nofreads = gt_encseq_num_of_sequences(ssar->encseq); if (!arguments->singlestrand) { nofreads = GT_DIV2(nofreads); firstrevcompl = nofreads; } GT_INITBITTAB(bits, nofreads); if (!arguments->singlestrand) if (gt_encseq_accesstype_get(ssar->encseq) == GT_ACCESS_TYPE_EQUALLENGTH) readlength = gt_encseq_seqlength(ssar->encseq, 0); (void)gt_contfind_bottomup(ssar, false, bits, arguments->singlestrand ? 0 : firstrevcompl, readlength); } if (ssar != NULL) gt_freeSequentialsuffixarrayreader(&ssar); gt_logger_delete(verbose_logger); } else { gt_assert(false); } if (!had_err) had_err = gt_cntlist_show(bits, nofreads, NULL, false, err); gt_free(bits); return had_err; }
static bool gt_mmsearch(const GtEncseq *dbencseq, GtEncseqReader *esr, const ESASuffixptr *suftab, GtReadmode readmode, Lcpinterval *lcpitv, const GtQuerysubstring *querysubstring, GtUword minmatchlength) { GtUword left, leftsave, mid, right, lpref, rpref, totallength, lcplen, sidx; int retcode = 0; GtUchar currentdbchar, currentquerychar; totallength = gt_encseq_total_length(dbencseq); leftsave = left = lcpitv->left; right = lcpitv->right; lcplen = lcpitv->offset; GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,left),lcplen); if (retcode > 0) { lpref = lcplen; lcplen = lcpitv->offset; GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,right),lcplen); if (retcode > 0) { return false; } else { rpref = lcplen; while (right > left + 1) { mid = GT_DIV2(left+right); lcplen = MIN(lpref,rpref); GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,mid),lcplen); if (retcode <= 0) { right = mid; rpref = lcplen; } else { left = mid; lpref = lcplen; } } lcpitv->left = right; } } left = leftsave; right = lcpitv->right; lcplen = lcpitv->offset; GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,left),lcplen); if (retcode < 0) { return false; } else { lpref = lcplen; lcplen = lcpitv->offset; GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,right),lcplen); if (retcode >= 0) { lcpitv->right = right; } else { rpref = lcplen; while (right > left + 1) { mid = GT_DIV2(left+right); lcplen = MIN(lpref,rpref); GT_MMSEARCH_COMPARE(ESASUFFIXPTRGET(suftab,mid),lcplen); if (retcode >= 0) { left = mid; lpref = lcplen; } else { right = mid; rpref = lcplen; } } lcpitv->right = left; } } return true; }
double *gt_encseq_get_gc(const GtEncseq *encseq, bool with_special, bool calculate, GT_UNUSED GtError *err) { GtEncseqReader *reader; GtAlphabet *alphabet; double *gc_content; /* unit = file or sequence depending on per_file */ unsigned long char_idx, totallength, max_unit, seq_idx = 0, nextsep = 0, at_count = 0, gc_count = 0, default_count = 0; bool is_mirrored_encseq; GtUchar acgt[8], current_c; alphabet = gt_encseq_alphabet(encseq); gt_assert(gt_alphabet_is_dna(alphabet)); gt_alphabet_encode_seq(alphabet, acgt, "aAtTcCgG", 8UL); totallength = gt_encseq_total_length(encseq); reader = gt_encseq_create_reader_with_readmode(encseq, GT_READMODE_FORWARD, 0); is_mirrored_encseq = gt_encseq_is_mirrored(encseq); if (is_mirrored_encseq) { max_unit = GT_DIV2(gt_encseq_num_of_sequences(encseq)); gc_content = gt_calloc((size_t) GT_MULT2(max_unit), sizeof (double)); } else { max_unit = gt_encseq_num_of_sequences(encseq); gc_content = gt_calloc((size_t) max_unit, sizeof (double)); } nextsep = gt_encseq_seqstartpos(encseq, seq_idx) + gt_encseq_seqlength(encseq, seq_idx); for (char_idx = 0; char_idx < totallength; char_idx++) { if (nextsep == char_idx) { if (calculate) { calculate_gc(encseq, gc_content, with_special, seq_idx, gc_count, at_count); } else { gc_content[seq_idx] = (double) gc_count; } seq_idx++; nextsep = gt_encseq_seqstartpos(encseq, seq_idx) + gt_encseq_seqlength(encseq, seq_idx); gt_encseq_reader_reinit_with_readmode(reader, encseq, GT_READMODE_FORWARD, char_idx + 1UL); gc_count = at_count = default_count = 0UL; continue; } current_c = gt_encseq_reader_next_encoded_char(reader); if (current_c == acgt[0] || current_c == acgt[1] || current_c == acgt[2] || current_c == acgt[3]) { at_count++; } else { if (current_c == acgt[4] || current_c == acgt[5] || current_c == acgt[6] || current_c == acgt[7]) { gc_count++; } else { default_count++; } } } if (calculate) { calculate_gc(encseq, gc_content, with_special, seq_idx, gc_count, at_count); } else { gc_content[seq_idx] = (double) gc_count; } gt_encseq_reader_delete(reader); if (is_mirrored_encseq) { unsigned long double_max_unit = GT_MULT2(max_unit); for (seq_idx = 0; seq_idx < max_unit; seq_idx++) { gc_content[double_max_unit - seq_idx - 1] = gc_content[seq_idx]; } } return gc_content; }
int gt_intset_16_unit_test(GtError *err) { int had_err = 0; GtIntset *is; GtUword num_of_elems = gt_rand_max(((GtUword) 1) << 10) + 1, *arr = gt_malloc(sizeof (*arr) * num_of_elems), stepsize = GT_DIV2(num_of_elems <<4 / num_of_elems), idx; size_t is_size; gt_error_check(err); arr[0] = gt_rand_max(stepsize) + 1; for (idx = (GtUword) 1; idx < num_of_elems; ++idx) { arr[idx] = arr[idx - 1] + gt_rand_max(stepsize) + 1; } is_size = gt_intset_16_size_of_rep(arr[num_of_elems - 1], num_of_elems); if (!had_err) { if (is_size < (size_t) UINT_MAX) { is = gt_intset_16_new(arr[num_of_elems - 1], num_of_elems); for (idx = 0; idx < num_of_elems; idx++) { gt_intset_16_add(is, arr[idx]); gt_ensure(idx + 1 == gt_intset_16_size(is)); if (idx < num_of_elems - 1) gt_ensure(gt_intset_16_get_idx_smallest_geq(is, arr[idx] + 1) == num_of_elems); } gt_ensure(gt_intset_16_elems_is_valid(is)); gt_ensure(gt_intset_16_secstart_is_valid(is)); for (idx = 0; !had_err && idx < num_of_elems; idx++) { if (arr[idx] != 0 && arr[idx - 1] != (arr[idx] - 1)) { gt_ensure( gt_intset_16_get_idx_smallest_geq_test(is, arr[idx] - 1) == idx); gt_ensure( gt_intset_16_get_idx_smallest_geq(is, arr[idx] - 1) == idx); } gt_ensure(gt_intset_16_get_test(is, idx) == arr[idx]); gt_ensure(gt_intset_16_get(is, idx) == arr[idx]); gt_ensure( gt_intset_16_get_idx_smallest_geq_test(is, arr[idx] + 1) == idx + 1); gt_ensure( gt_intset_16_get_idx_smallest_geq(is, arr[idx] + 1) == idx + 1); } if (!had_err) had_err = gt_intset_unit_test_notinset(is, 0, arr[0] - 1, err); if (!had_err) had_err = gt_intset_unit_test_check_seqnum(is, 0, arr[0] - 1, 0, err); for (idx = (GtUword) 1; !had_err && idx < num_of_elems; idx++) { had_err = gt_intset_unit_test_notinset(is, arr[idx - 1] + 1, arr[idx] - 1, err); if (!had_err) had_err = gt_intset_unit_test_check_seqnum(is, arr[idx - 1] + 1, arr[idx] - 1, idx, err); } gt_intset_delete(is); } } gt_free(arr); return had_err; }
/* calculating alignment in linear space */ GtUword gt_calc_linearalign(LinspaceManagement *spacemanager, GtScoreHandler *scorehandler, GtAlignment *align, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen) { GtUword distance, gapcost, *Ctab, *EDtabcolumn, *Rtabcolumn, threadcount = 1; gt_assert(scorehandler); gt_linspaceManagement_set_ulen(spacemanager,ulen); gapcost = gt_scorehandler_get_gapscore(scorehandler); if (ulen == 0UL) { return construct_trivial_insertion_alignment(align,vlen,gapcost); } else if (vlen == 0UL) { return construct_trivial_deletion_alignment(align,ulen,gapcost); } else if (vlen == 1UL) { gt_linspaceManagement_check(spacemanager,(ulen+1)*(vlen+1)-1,ulen, sizeof (*EDtabcolumn), sizeof (EDtabcolumn), 0); return alignment_in_square_space_generic(spacemanager, align, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); } else if (gt_linspaceManagement_checksquare(spacemanager, ulen, vlen, sizeof (*EDtabcolumn), sizeof (*Rtabcolumn))) { /* call 2dim */ return alignment_in_square_space_generic(spacemanager, align, useq, ustart, ulen, vseq, vstart, vlen, scorehandler); } #ifdef GT_THREADS_ENABLED gt_linspaceManagement_check(spacemanager, ulen + GT_DIV2(vlen), vlen, sizeof (*EDtabcolumn), sizeof (*Rtabcolumn), sizeof (*Ctab)); #else gt_linspaceManagement_check(spacemanager, ulen, vlen, sizeof (*EDtabcolumn), sizeof (*Rtabcolumn), sizeof (*Ctab)); #endif Ctab = gt_linspaceManagement_get_crosspointTabspace(spacemanager); Ctab[vlen] = ulen; distance = evaluatelinearcrosspoints(spacemanager, scorehandler, useq, ustart, ulen, vseq, vstart, vlen, Ctab, 0, 0, &threadcount); determineCtab0(Ctab, scorehandler, vseq[vstart], useq, ustart); reconstructalignment_from_Ctab(align, Ctab, useq, ustart, vseq, vstart, vlen, scorehandler); return distance; }
/* evaluate crosspoints in recursive way */ static GtUword evaluatelinearcrosspoints(LinspaceManagement *spacemanager, GtScoreHandler *scorehandler, const GtUchar *useq, GtUword ustart, GtUword ulen, const GtUchar *vseq, GtUword vstart, GtUword vlen, GtUword *Ctab, GtUword rowoffset, GT_UNUSED GtUword threadidx, GT_UNUSED GtUword *threadcount) { GtUword midrow, midcol, distance, *EDtabcolumn = NULL, *Rtabcolumn = NULL; #ifdef GT_THREADS_ENABLED GtThread *t1 = NULL, *t2 = NULL; GtLinearCrosspointthreadinfo threadinfo1, threadinfo2; #endif if (vlen >= 2UL) { if (ulen == 0) { GtUword i; for (i = 0; i <= vlen; i++) Ctab[i] = rowoffset; return rowoffset; } #ifndef GT_THREADS_ENABLED if (gt_linspaceManagement_checksquare(spacemanager, ulen,vlen, sizeof (GtUword), sizeof (Rtabcolumn))) { /* product of subsquences is lower than space allocated already or * lower than timesquarfactor * ulen*/ return ctab_in_square_space(spacemanager, scorehandler, Ctab, useq, ustart, ulen, vseq, vstart, vlen, rowoffset); } #endif midcol = GT_DIV2(vlen); Rtabcolumn = gt_linspaceManagement_get_rTabspace(spacemanager); EDtabcolumn = gt_linspaceManagement_get_valueTabspace(spacemanager); Rtabcolumn = Rtabcolumn + rowoffset + threadidx; EDtabcolumn = EDtabcolumn + rowoffset + threadidx; distance = evaluateallEDtabRtabcolumns(EDtabcolumn, Rtabcolumn, scorehandler, midcol, useq, ustart, ulen, vseq, vstart, vlen); midrow = Rtabcolumn[ulen]; Ctab[midcol] = rowoffset + midrow; #ifdef GT_THREADS_ENABLED if (*threadcount + 2 > gt_jobs) { #endif /* upper left corner */ (void) evaluatelinearcrosspoints(spacemanager, scorehandler, useq, ustart, midrow, vseq, vstart, midcol, Ctab, rowoffset, threadidx, threadcount); /* bottom right corner */ (void) evaluatelinearcrosspoints(spacemanager, scorehandler, useq, ustart + midrow, ulen - midrow, vseq, vstart + midcol, vlen - midcol, Ctab + midcol, rowoffset + midrow, threadidx, threadcount); #ifdef GT_THREADS_ENABLED } else { threadinfo1 = set_LinearCrosspointthreadinfo(spacemanager, scorehandler, useq, ustart, midrow, vseq, vstart, midcol, Ctab, rowoffset, threadidx, threadcount); (*threadcount)++; t1 = gt_thread_new(evaluatelinearcrosspoints_thread_caller, &threadinfo1, NULL); threadinfo2 = set_LinearCrosspointthreadinfo(spacemanager, scorehandler, useq, ustart + midrow, ulen - midrow, vseq, vstart + midcol, vlen - midcol, Ctab + midcol, rowoffset + midrow, threadidx + GT_DIV2(midcol), threadcount); (*threadcount)++; t2 = gt_thread_new(evaluatelinearcrosspoints_thread_caller, &threadinfo2, NULL); gt_thread_join(t1); (*threadcount)--; gt_thread_join(t2); (*threadcount)--; gt_thread_delete(t1); gt_thread_delete(t2); } #endif return distance; } return 0; }
GtScoreHandler *gt_scorehandler2costhandler(const GtScoreHandler *scorehandler) { GtScoreHandler *costhandler; gt_assert(scorehandler != NULL); if (scorehandler->scorematrix == NULL) { GtWord matchscore, mismatchscore, gap_extension, gap_opening, maxscore = MAX(MAX(GT_DIV2(scorehandler->matchscore+1), GT_DIV2(scorehandler->mismatchscore+1)), MAX(1 + scorehandler->gap_extension,0)); matchscore = 2 * maxscore - scorehandler->matchscore; mismatchscore = 2 * maxscore - scorehandler->mismatchscore; gap_extension = maxscore - scorehandler->gap_extension; gap_opening = -scorehandler->gap_opening; costhandler = gt_scorehandler_new(matchscore, mismatchscore, gap_opening, gap_extension); if (!scorehandler->mappedsequence) { gt_scorehandler_plain(costhandler); } } else { int maxscore; GtWord gap_extension, gap_opening; unsigned int i, j, dim = gt_score_matrix_get_dimension(scorehandler->scorematrix); GtScoreMatrix *costmatrix = gt_score_matrix_clone_empty(scorehandler->scorematrix); for (maxscore = 0, i = 0; i < dim; i++) { for (j = 0; j < dim; j++) { int val = gt_score_matrix_get_score(scorehandler->scorematrix, i, j); if (val > maxscore) { maxscore = val; } } } maxscore = MAX(GT_DIV2(maxscore+1), 1 + scorehandler->gap_extension); for (i = 0; i < dim; i++) { for (j = 0; j < dim; j++) { /* translate */ int score = gt_score_matrix_get_score(scorehandler->scorematrix,i,j); gt_score_matrix_set_score(costmatrix, i, j, 2 * maxscore - score); } } gap_extension = maxscore - scorehandler->gap_extension; gap_opening = -scorehandler->gap_opening; costhandler = gt_scorehandler_new( 0,0, gap_opening, gap_extension); gt_scorehandler_add_scorematrix(costhandler,costmatrix); } return costhandler; }
static GtUword gt_wtree_encseq_rank_rec(GtWtreeEncseq *we, GtUword pos, GtWtreeSymbol sym, GtUword node_start, GtUword node_size, unsigned int alpha_start, unsigned int alpha_end) { unsigned int middle = GT_DIV2(alpha_start + alpha_end); int bit; GtUword zero_rank_prefix = 0, one_rank_prefix = 0, left_child_size, rank; gt_log_log("alphabet: %u-%u-%u, sym: " GT_WU, alpha_start, middle, alpha_end, (GtUword) sym); gt_log_log("pos: "GT_WU"", pos); gt_assert(pos < node_size); if (alpha_start < alpha_end) { bit = middle < (unsigned int) sym ? 1 : 0; if (node_start != 0) zero_rank_prefix = gt_compressed_bitsequence_rank_0(we->c_bits, node_start - 1); left_child_size = gt_compressed_bitsequence_rank_0(we->c_bits, node_start + node_size - 1) - zero_rank_prefix; if (bit == 0) { rank = gt_compressed_bitsequence_rank_0(we->c_bits, node_start + pos) - zero_rank_prefix; alpha_end = middle; node_start += we->parent_instance.members->length; node_size = left_child_size; } else { if (node_start != 0) one_rank_prefix = gt_compressed_bitsequence_rank_1(we->c_bits, node_start - 1); rank = gt_compressed_bitsequence_rank_1(we->c_bits, node_start + pos) - one_rank_prefix; alpha_start = middle + 1; node_size = gt_compressed_bitsequence_rank_1(we->c_bits, node_start + node_size - 1) - one_rank_prefix; node_start += we->parent_instance.members->length + left_child_size; } gt_log_log("bit: %d, nodesize: "GT_WU"", bit, node_size); if (node_size != 0 && rank != 0) { pos = rank - 1; return gt_wtree_encseq_rank_rec(we, pos, sym, node_start, node_size, alpha_start, alpha_end); } return 0; } gt_log_log("found: rank="GT_WU"", pos + 1); return pos + 1; /* convert position to count */ }