static GtUword condenseq_unique_extract_encoded(const GtCondenseq *cs, GtUword id, GtUchar *buffer, GtUword frompos, GtUword topos) { GtCondenseqUnique unique = cs->uniques[id]; GtUword startoffset, startpos, uniquelength, targetlength, endpos; gt_assert(unique.orig_startpos <= frompos); startoffset = frompos - unique.orig_startpos; gt_assert(startoffset < unique.len); startpos = gt_encseq_seqstartpos(cs->unique_es, id) + startoffset; uniquelength = unique.len - startoffset; targetlength = topos - frompos + 1; if (uniquelength < targetlength) endpos = startpos + uniquelength - 1; else endpos = startpos + targetlength - 1; gt_encseq_extract_encoded(cs->unique_es, buffer, startpos, endpos); return endpos - startpos + 1; }
void gt_bioseq_get_encoded_sequence_range(const GtBioseq *bs, GtUchar *out, GtUword idx, GtUword start, GtUword end) { GtUword startpos; gt_assert(bs); gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq) && end >= start); startpos = gt_encseq_seqstartpos(bs->encseq, idx); gt_encseq_extract_encoded(bs->encseq, out, startpos + start, startpos + end); }
void gt_bioseq_get_encoded_sequence(const GtBioseq *bs, GtUchar *out, GtUword idx) { GtUword startpos; gt_assert(bs); gt_assert(idx < gt_encseq_num_of_sequences(bs->encseq)); startpos = gt_encseq_seqstartpos(bs->encseq, idx); gt_encseq_extract_encoded(bs->encseq, out, startpos, startpos + gt_encseq_seqlength(bs->encseq, idx) - 1); }
static int encseq_lua_extract_encoded(lua_State *L) { GtEncseq **encseq; GtUword from, to; unsigned char *string; encseq = check_encseq(L, 1); from = luaL_checknumber(L, 2); to = luaL_checknumber(L, 3); luaL_argcheck(L, from <= to, 2, "must be <= range endposition"); luaL_argcheck(L, to < gt_encseq_total_length(*encseq), 3, "cannot exceed total length of encoded sequence"); string = gt_malloc((to - from + 1) * sizeof (unsigned char)); gt_encseq_extract_encoded(*encseq, string, from, to); encseq_lua_push_buffer(L, string, (to - from + 1)); return 1; }
static GtUword samplesubstring(GtUchar *seqspace, const GtEncseq *encseq, GtUword substringlength) { GtUword start, totallength; totallength = gt_encseq_total_length(encseq); start = (GtUword) (random() % totallength); if (start + substringlength > totallength) { substringlength = totallength - start; } gt_assert(substringlength > 0); gt_encseq_extract_encoded(encseq,seqspace,start, start+substringlength-1); return substringlength; }
static void gt_show_seed_extend_plain(GtSequencepairbuffer *seqpairbuf, GtLinspaceManagement *linspace_spacemanager, GtScoreHandler *linspace_scorehandler, GtAlignment *alignment, GtUchar *alignment_show_buffer, GtUword alignmentwidth, bool showeoplist, const GtUchar *characters, GtUchar wildcardshow, const GtEncseq *aencseq, const GtEncseq *bencseq, const GtQuerymatch *querymatchptr) { GtUword edist; GtReadmode query_readmode = gt_querymatch_query_readmode(querymatchptr); const GtUword distance = gt_querymatch_distance(querymatchptr), dblen = gt_querymatch_dblen(querymatchptr), queryseqnum = gt_querymatch_queryseqnum(querymatchptr), querystart_fwdstrand = gt_querymatch_querystart_fwdstrand(querymatchptr), querylen = gt_querymatch_querylen(querymatchptr); const GtUword apos_ab = gt_querymatch_dbstart(querymatchptr); const GtUword bpos_ab = gt_encseq_seqstartpos(bencseq, queryseqnum) + querystart_fwdstrand; gt_querymatch_coordinates_out(querymatchptr); if (dblen >= seqpairbuf->a_allocated) { seqpairbuf->a_sequence = gt_realloc(seqpairbuf->a_sequence, sizeof *seqpairbuf->a_sequence * dblen); seqpairbuf->a_allocated = dblen; } if (querylen >= seqpairbuf->b_allocated) { seqpairbuf->b_sequence = gt_realloc(seqpairbuf->b_sequence, sizeof *seqpairbuf->b_sequence * querylen); seqpairbuf->b_allocated = querylen; } gt_encseq_extract_encoded(aencseq, seqpairbuf->a_sequence, apos_ab, apos_ab + dblen - 1); gt_encseq_extract_encoded(bencseq, seqpairbuf->b_sequence, bpos_ab, bpos_ab + querylen - 1); if (query_readmode != GT_READMODE_FORWARD) { gt_assert(query_readmode == GT_READMODE_REVCOMPL); gt_inplace_reverse_complement(seqpairbuf->b_sequence,querylen); } edist = gt_computelinearspace_generic(linspace_spacemanager, linspace_scorehandler, alignment, seqpairbuf->a_sequence, 0, dblen, seqpairbuf->b_sequence, 0, querylen); if (edist < distance) { printf("# edist=" GT_WU " (smaller by " GT_WU ")\n",edist,distance - edist); } gt_assert(edist <= distance); if (alignmentwidth > 0) { gt_alignment_show_generic(alignment_show_buffer, false, alignment, stdout, alignmentwidth, characters, wildcardshow); } if (showeoplist && distance > 0) { gt_alignment_show_multieop_list(alignment, stdout); } if (alignmentwidth > 0 || showeoplist) { gt_alignment_reset(alignment); } }