static bool trimthisentry(GtUword distance, Rowvaluetype row, GtWord diagonal, GtUword minlenforhistorycheck, Matchcounttype matchhistory_count, GtUword minmatchnum, GtUword minlenfrommaxdiff) { GtUword alignedlen = GT_MULT2(row) + diagonal; if (alignedlen >= minlenforhistorycheck && matchhistory_count < minmatchnum) { printf(GT_WD "&" GT_WU "&%u&1: matches=%d < " GT_WU "=minmatches\n", diagonal,distance,row,(int) matchhistory_count,minmatchnum); return true; } if (alignedlen < minlenfrommaxdiff) { printf(GT_WD "&" GT_WU "&%u&2: i'+j'=" GT_WU "<" GT_WU "=i+j-lag\n", diagonal,distance,row,alignedlen,minlenfrommaxdiff); return true; } printf(GT_WD "&" GT_WU "&%u\n", diagonal,distance,row); return false; }
Pckbuckettable *gt_pckbuckettable_new(const FMindex *fmindex, unsigned int numofchars, unsigned long totallength, unsigned int maxdepth) { GtArrayPckbck_Boundsatdepth stack; Pckbck_Boundsatdepth parent, child; unsigned long rangesize, idx, *rangeOccs; Pckbuckettable *pckbt; Mbtab *tmpmbtab; GT_INITARRAY(&stack,Pckbck_Boundsatdepth); child.lowerbound = 0; child.upperbound = totallength+1; child.depth = 0; child.code = (GtCodetype) 0; GT_STOREINARRAY(&stack,Pckbck_Boundsatdepth,128,child); rangeOccs = gt_malloc(sizeof (*rangeOccs) * GT_MULT2(numofchars)); tmpmbtab = gt_malloc(sizeof (*tmpmbtab) * numofchars); pckbt = pckbuckettable_allocandinittable(numofchars,maxdepth,true); while (stack.nextfreePckbck_Boundsatdepth > 0) { parent = stack.spacePckbck_Boundsatdepth[--stack.nextfreePckbck_Boundsatdepth]; gt_assert(parent.lowerbound < parent.upperbound); rangesize = gt_bwtrangesplitallwithoutspecial(tmpmbtab, rangeOccs, fmindex, parent.lowerbound, parent.upperbound); gt_assert(rangesize <= (unsigned long) numofchars); for (idx = 0; idx < rangesize; idx++) { child.lowerbound = tmpmbtab[idx].lowerbound; child.upperbound = tmpmbtab[idx].upperbound; child.depth = parent.depth + 1; gt_assert(child.depth <= maxdepth); child.code = parent.code * numofchars + idx; pckbuckettable_storeBoundsatdepth(pckbt,&child); if (child.depth < maxdepth) { if (child.lowerbound + 1 < child.upperbound) { GT_STOREINARRAY(&stack,Pckbck_Boundsatdepth,128,child); } else { pckbuckettable_followleafedge(pckbt,fmindex,&child); } } } } GT_FREEARRAY(&stack,Pckbck_Boundsatdepth); gt_free(rangeOccs); gt_free(tmpmbtab); printf("filled: %lu (%.2f)\n",pckbt->numofvalues, (double) pckbt->numofvalues/pckbt->maxnumofvalues); return pckbt; }
static void update_trace_and_polished(Polished_point *best_polished_point, #ifndef OUTSIDE_OF_GT GtUword *minrow, GtUword *mincol, #endif Fronttrace *front_trace, const Polishing_info *pol_info, GtUword distance, GtUword trimleft, Frontvalue *midfront, Frontvalue *lowfront, Frontvalue *highfront) { const Frontvalue *frontptr; uint64_t lsb; #ifndef OUTSIDE_OF_GT *minrow = GT_UWORD_MAX; *mincol = GT_UWORD_MAX; #endif for (frontptr = lowfront; frontptr <= highfront; frontptr++) { GtUword alignedlen = GT_MULT2(frontptr->row) + FRONT_DIAGONAL(frontptr); #ifndef OUTSIDE_OF_GT GtUword currentcol; if (*minrow > frontptr->row) { *minrow = frontptr->row; } gt_assert(FRONT_DIAGONAL(frontptr) >= 0 || frontptr->row >= -FRONT_DIAGONAL(frontptr)); currentcol = frontptr->row + FRONT_DIAGONAL(frontptr); if (*mincol > currentcol) { *mincol = currentcol; } #endif lsb = frontptr->matchhistory & pol_info->mask; if (HISTORY_IS_POLISHED(pol_info,frontptr->matchhistory,lsb) && alignedlen > best_polished_point->alignedlen) { best_polished_point->alignedlen = alignedlen; best_polished_point->row = frontptr->row; best_polished_point->distance = distance; best_polished_point->trimleft = trimleft; } if (front_trace != NULL) { front_trace_add_trace(front_trace,frontptr->backreference, frontptr->localmatch_count); } } }
static GtUword front_second_inplace(Frontvalue *midfront, Frontvalue *lowfront, GtUword history, Sequenceobject *useq, Sequenceobject *vseq) { GtUword alignedlen, maxalignedlen; const uint64_t mask = ((uint64_t) 1) << (history-1); *(lowfront+1) = *(lowfront+2) = *lowfront; lowfront->row++; lowfront->backreference = FT_EOP_DELETION; UPDATE_MATCH_HISTORY(lowfront->matchhistory_count,lowfront->matchhistory); add_matches(midfront,lowfront,mask,useq,vseq); maxalignedlen = GT_MULT2(lowfront->row) + FRONT_DIAGONAL(lowfront); (lowfront+1)->row++; (lowfront+1)->backreference = FT_EOP_REPLACEMENT; UPDATE_MATCH_HISTORY((lowfront+1)->matchhistory_count, (lowfront+1)->matchhistory); add_matches(midfront,lowfront + 1,mask,useq,vseq); alignedlen = GT_MULT2((lowfront+1)->row) + FRONT_DIAGONAL(lowfront + 1); if (maxalignedlen < alignedlen) { maxalignedlen = alignedlen; } (lowfront+2)->backreference = FT_EOP_INSERTION; UPDATE_MATCH_HISTORY((lowfront+2)->matchhistory_count, (lowfront+2)->matchhistory); add_matches(midfront,lowfront + 2,mask,useq,vseq); alignedlen = GT_MULT2((lowfront+2)->row) + FRONT_DIAGONAL(lowfront + 2); if (maxalignedlen < alignedlen) { maxalignedlen = alignedlen; } return maxalignedlen; }
static GtUword polished_point2offset(GT_UNUSED const GtFrontTrace *front_trace, const GtFtPolished_point *pp) { GtWord base_diagonal, pp_diagonal; gt_assert(pp != NULL); pp_diagonal = (GtWord) pp->alignedlen - (GtWord) GT_MULT2(pp->row); gt_assert(pp->distance < front_trace->gen_nextfree); base_diagonal = (GtWord) pp->trimleft - (GtWord) pp->distance; gt_assert(base_diagonal <= pp_diagonal); gt_assert(pp_diagonal < base_diagonal + (GtWord) front_trace->gen_table[pp->distance].valid); return (GtUword) (pp_diagonal - base_diagonal); }
void gt_mergertrie_initnodetable(Mergertrierep *trierep, GtUword numofsuffixes, unsigned int numofindexes) { trierep->numofindexes = numofindexes; trierep->allocatedMergertrienode = (unsigned int) GT_MULT2(numofsuffixes + 1) + 1; trierep->nodetable = gt_malloc(sizeof *trierep->nodetable * trierep->allocatedMergertrienode); trierep->nextfreeMergertrienode = 0; trierep->root = NULL; trierep->nextunused = 0; trierep->unusedMergertrienodes = gt_malloc(sizeof *trierep->unusedMergertrienodes * trierep->allocatedMergertrienode); }
/*@null@*/ const GtUchar *gt_tyrindex_binmersearch(const Tyrindex *tyrindex, GtUword offset, const GtUchar *key, const GtUchar *leftbound, const GtUchar *rightbound) { const GtUchar *leftptr, *midptr, *rightptr; int cmpval; GtUword leftlength = offset, rightlength = offset, len; leftptr = leftbound; rightptr = rightbound; while (leftptr <= rightptr) { len = (GtUword) (rightptr-leftptr)/GT_MULT2(tyrindex->merbytes); midptr = leftptr + tyrindex->merbytes * len; cmpval = mymemcmp(&offset,midptr,key,tyrindex->merbytes); if (cmpval < 0) { leftptr = midptr + tyrindex->merbytes; leftlength = offset; if (offset > rightlength) { offset = rightlength; } } else { if (cmpval > 0) { rightptr = midptr - tyrindex->merbytes; rightlength = offset; if (offset > leftlength) { offset = leftlength; } } else { return midptr; } } } return NULL; }
static bool trimthisentry(Rowvaluetype row, GtWord diagonal, GtUword minlenforhistorycheck, Matchcounttype matchhistory_count, GtUword minmatchnum, GtUword minlenfrommaxdiff) { GtUword alignedlen = GT_MULT2(row) + diagonal; if (alignedlen >= minlenforhistorycheck && matchhistory_count < minmatchnum) { return true; } if (alignedlen < minlenfrommaxdiff) { return true; } return false; }
void gt_pck_count_nodes_dfs(const FMindex *index, GtUword totallength, unsigned int numofchars) { GtStackNodecount stack; Nodecount root; Nodecount *current; Mbtab *tmpmbtab; GtUword *rangeOccs; GtUword resize = 128UL; /* TODO DW make this user definable, or dependable on input data */ GT_STACK_INIT(&stack, resize); rangeOccs = gt_malloc(sizeof (*rangeOccs) * GT_MULT2(numofchars)); tmpmbtab = gt_malloc(sizeof (*tmpmbtab) * numofchars); root.lower = 0UL; root.upper = totallength + 1; root.leaves = 0UL; root.branching = 1UL; root.parentOffset = 0U; root.visited = false; root.on_branch = false; GT_STACK_PUSH(&stack, root); while (!GT_STACK_ISEMPTY(&stack)) { current = &(stack.space[stack.nextfree -1]); if (current->visited) { current = &(GT_STACK_POP(&stack)); if GT_STACK_ISEMPTY(&stack) { /* TODO DW change to gt_loger_log */ gt_log_log("on root:\n "GT_WU" branching nodes\n "GT_WU" leaves\n", current->branching, current->leaves); } else { process_count_node(&stack, current); } }
void gt_computefmkeyvalues (Fmindex *fm, const GtSpecialcharinfo *specialcharinfo, GtUword bwtlength, unsigned int log2bsize, unsigned int log2markdist, unsigned int numofchars, unsigned int suffixlength, bool storeindexpos) { fm->mappedptr = NULL; fm->log2bsize = log2bsize; fm->log2markdist = log2markdist; fm->bwtlength = bwtlength; fm->log2superbsize = GT_MULT2 (fm->log2bsize); fm->bsize = (unsigned int) GT_POW2 (fm->log2bsize); fm->bsizehalve = GT_DIV2(fm->bsize); fm->superbsize = (unsigned int) GT_POW2 (fm->log2superbsize); fm->nofblocks = (GtUword) (fm->bwtlength / fm->bsize) + 1; fm->nofsuperblocks = (GtUword) (fm->bwtlength / fm->superbsize) + 2; fm->markdist = (GtUword) GT_POW2 (fm->log2markdist); fm->markdistminus1 = (GtUword) (fm->markdist - 1); fm->negatebsizeones = ~ (GtUword) (fm->bsize - 1); fm->negatesuperbsizeones = ~ (GtUword) (fm->superbsize - 1); fm->log2superbsizeminuslog2bsize = fm->log2superbsize - fm->log2bsize; fm->mapsize = numofchars+1; fm->suffixlength = suffixlength; if (fm->suffixlength > 0) { fm->numofcodes = gt_power_for_small_exponents(fm->mapsize-1, fm->suffixlength); } else { fm->numofcodes = 0; } fm->sizeofindex = determinefmindexsize (fm, specialcharinfo, suffixlength, storeindexpos); }
GtUword front_prune_edist_inplace( #ifndef OUTSIDE_OF_GT bool forward, GtAllocatedMemory *frontspace, #endif Trimstat *trimstat, Polished_point *best_polished_point, Fronttrace *front_trace, const Polishing_info *pol_info, GtUword history, GtUword minmatchnum, GtUword maxalignedlendifference, FTsequenceResources *ufsr, GtUword ustart, GtUword ulen, FTsequenceResources *vfsr, GtUword vstart, GtUword vlen) { const GtUword sumseqlength = ulen + vlen, minsizeforshift = sumseqlength/1000, minlenforhistorycheck = GT_MULT2(history); /* so the space for allocating the fronts is sizeof (Frontvalue) * ((m+n)/1000 + maxvalid), where maxvalid is a small constant. */ GtUword distance, trimleft = 0, valid = 1UL, maxvalid = 0, sumvalid = 0; const uint64_t mask = ((uint64_t) 1) << (history-1); Frontvalue *validbasefront; bool diedout = false; Sequenceobject useq, vseq; #ifdef OUTSIDE_OF_GT GtAllocatedMemory *frontspace = gt_malloc(sizeof *frontspace); frontspace->space = NULL; frontspace->allocated = 0; frontspace->offset = 0; sequenceobject_init(&useq,useqptr,ustart,ulen); sequenceobject_init(&vseq,vseqptr,vstart,vlen); #else GtReadmode readmode = forward ? GT_READMODE_FORWARD : GT_READMODE_REVERSE; sequenceobject_init(&useq,ufsr->extend_char_access,ufsr->encseq,readmode, ustart,ulen,ufsr->encseq_r,ufsr->sequence_cache, ufsr->totallength); sequenceobject_init(&vseq,ufsr->extend_char_access,vfsr->encseq,readmode, vstart,vlen,vfsr->encseq_r,vfsr->sequence_cache, vfsr->totallength); frontspace->offset = 0; #endif #ifdef TRIM_INFO_OUT printf("regionalquality(minmatchnum)=" GT_WU "\n",minmatchnum); #endif for (distance = 0, valid = 1UL; /* Nothing */; distance++, valid += 2) { GtUword trim, maxalignedlen, minlenfrommaxdiff; #ifdef TRIM_INFO_OUT printf("distance=" GT_WU ",full=" GT_WU ",trimleft=" GT_WU ",valid=" GT_WU "\n",distance, GT_MULT2(distance) + 1, trimleft,valid); #endif gt_assert(valid <= GT_MULT2(distance) + 1); sumvalid += valid; if (maxvalid < valid) { maxvalid = valid; } validbasefront = frontspace_allocate(minsizeforshift,trimleft,valid, frontspace); if (distance == 0) { validbasefront->row = 0; validbasefront->matchhistory = 0; validbasefront->matchhistory_count = 0; validbasefront->backreference = 0; /* No back reference */ add_matches(validbasefront + distance,validbasefront,mask,&useq,&vseq); maxalignedlen = GT_MULT2(validbasefront->row); } else { gt_assert(valid >= 3UL); frontspace_check((const Frontvalue *) frontspace->space, ((const Frontvalue *) frontspace->space) + frontspace->allocated - 1, validbasefront + trimleft); frontspace_check((const Frontvalue *) frontspace->space, ((const Frontvalue *) frontspace->space) + frontspace->allocated - 1, validbasefront + trimleft + valid - 1); if (valid == 3UL) { maxalignedlen = front_second_inplace(validbasefront + distance, validbasefront + trimleft, history, &useq, &vseq); } else { maxalignedlen = front_next_inplace(validbasefront + distance, validbasefront + trimleft, validbasefront + trimleft + valid - 1, history, &useq, &vseq); } } gt_assert(valid > 0); minlenfrommaxdiff = maxalignedlen >= maxalignedlendifference ? maxalignedlen - maxalignedlendifference : 0; #ifdef TRIM_INFO_OUT printf("maxalignedlen=" GT_WU ",maxlenfrommaxdiff=" GT_WU "\n", maxalignedlen,minlenfrommaxdiff); #endif trim = trim_front(true, #ifdef TRIM_INFO_OUT distance, #endif ulen, vlen, minmatchnum, minlenforhistorycheck, minlenfrommaxdiff, validbasefront + distance, validbasefront + trimleft, validbasefront + trimleft + valid); #ifdef TRIM_INFO_OUT printf("trim on left=" GT_WU "\n",trim); #endif if (trim > 0) { trimleft += trim; gt_assert(valid >= trim); valid -= trim; } if (valid > 0) { trim = trim_front(false, #ifdef TRIM_INFO_OUT distance, #endif ulen, vlen, minmatchnum, minlenforhistorycheck, minlenfrommaxdiff, validbasefront + distance, validbasefront + trimleft + valid - 1, validbasefront + trimleft - 1); #ifdef TRIM_INFO_OUT printf("trim on right=" GT_WU "\n",trim); #endif gt_assert(trim < valid); if (trim > 0) { gt_assert(valid >= trim); valid -= trim; } } if (valid == 0) { diedout = true; break; } if (front_trace != NULL) { front_trace_add_gen(front_trace,trimleft,valid); } update_trace_and_polished(best_polished_point, #ifndef OUTSIDE_OF_GT &useq.min_access_pos, &vseq.min_access_pos, #endif front_trace, pol_info, distance, trimleft, validbasefront + distance, validbasefront + trimleft, validbasefront + trimleft + valid - 1); if ((vlen > ulen && vlen - ulen <= distance) || (vlen <= ulen && ulen - vlen <= distance)) { if (distance + vlen - ulen >= trimleft && distance + vlen - ulen <= trimleft + valid - 1 && validbasefront[distance + vlen - ulen].row == ulen) { break; } } if (distance >= sumseqlength) { break; } } trimstat_add(trimstat,diedout,sumvalid,maxvalid,distance, sizeof (Frontvalue) * frontspace->allocated, #ifndef OUTSIDE_OF_GT useq.sequence_cache != NULL && vseq.sequence_cache != NULL ? MAX(useq.sequence_cache->allocated, vseq.sequence_cache->allocated) : 0 #else 0 #endif ); return diedout ? sumseqlength + 1 : distance; }
double *gt_encseq_get_gc(const GtEncseq *encseq, bool with_special, bool calculate, GT_UNUSED GtError *err) { GtEncseqReader *reader; GtAlphabet *alphabet; double *gc_content; /* unit = file or sequence depending on per_file */ unsigned long char_idx, totallength, max_unit, seq_idx = 0, nextsep = 0, at_count = 0, gc_count = 0, default_count = 0; bool is_mirrored_encseq; GtUchar acgt[8], current_c; alphabet = gt_encseq_alphabet(encseq); gt_assert(gt_alphabet_is_dna(alphabet)); gt_alphabet_encode_seq(alphabet, acgt, "aAtTcCgG", 8UL); totallength = gt_encseq_total_length(encseq); reader = gt_encseq_create_reader_with_readmode(encseq, GT_READMODE_FORWARD, 0); is_mirrored_encseq = gt_encseq_is_mirrored(encseq); if (is_mirrored_encseq) { max_unit = GT_DIV2(gt_encseq_num_of_sequences(encseq)); gc_content = gt_calloc((size_t) GT_MULT2(max_unit), sizeof (double)); } else { max_unit = gt_encseq_num_of_sequences(encseq); gc_content = gt_calloc((size_t) max_unit, sizeof (double)); } nextsep = gt_encseq_seqstartpos(encseq, seq_idx) + gt_encseq_seqlength(encseq, seq_idx); for (char_idx = 0; char_idx < totallength; char_idx++) { if (nextsep == char_idx) { if (calculate) { calculate_gc(encseq, gc_content, with_special, seq_idx, gc_count, at_count); } else { gc_content[seq_idx] = (double) gc_count; } seq_idx++; nextsep = gt_encseq_seqstartpos(encseq, seq_idx) + gt_encseq_seqlength(encseq, seq_idx); gt_encseq_reader_reinit_with_readmode(reader, encseq, GT_READMODE_FORWARD, char_idx + 1UL); gc_count = at_count = default_count = 0UL; continue; } current_c = gt_encseq_reader_next_encoded_char(reader); if (current_c == acgt[0] || current_c == acgt[1] || current_c == acgt[2] || current_c == acgt[3]) { at_count++; } else { if (current_c == acgt[4] || current_c == acgt[5] || current_c == acgt[6] || current_c == acgt[7]) { gc_count++; } else { default_count++; } } } if (calculate) { calculate_gc(encseq, gc_content, with_special, seq_idx, gc_count, at_count); } else { gc_content[seq_idx] = (double) gc_count; } gt_encseq_reader_delete(reader); if (is_mirrored_encseq) { unsigned long double_max_unit = GT_MULT2(max_unit); for (seq_idx = 0; seq_idx < max_unit; seq_idx++) { gc_content[double_max_unit - seq_idx - 1] = gc_content[seq_idx]; } } return gc_content; }
static GtUword front_next_inplace(Frontvalue *midfront, Frontvalue *lowfront, Frontvalue *highfront, GtUword history, Sequenceobject *useq, Sequenceobject *vseq) { GtUword alignedlen, maxalignedlen; const uint64_t mask = ((uint64_t) 1) << (history-1); Frontvalue bestfront, insertion_value, replacement_value, *frontptr; insertion_value = *lowfront; /* from previous diag -(d-1) => -d => DELETION */ bestfront = insertion_value; bestfront.row++; UPDATE_MATCH_HISTORY(bestfront.matchhistory_count,bestfront.matchhistory); *lowfront = bestfront; lowfront->backreference = FT_EOP_DELETION; add_matches(midfront,lowfront,mask,useq,vseq); maxalignedlen = GT_MULT2(lowfront->row) + FRONT_DIAGONAL(lowfront); replacement_value = *(lowfront+1); if (bestfront.row < replacement_value.row + 1) { bestfront = replacement_value; bestfront.backreference = FT_EOP_DELETION; bestfront.row++; UPDATE_MATCH_HISTORY(bestfront.matchhistory_count,bestfront.matchhistory); } else { bestfront.backreference = FT_EOP_REPLACEMENT; if (bestfront.row == replacement_value.row + 1) { bestfront.backreference |= FT_EOP_DELETION; } } *(lowfront+1) = bestfront; add_matches(midfront,lowfront + 1,mask,useq,vseq); alignedlen = GT_MULT2((lowfront+1)->row) + FRONT_DIAGONAL(lowfront + 1); if (maxalignedlen < alignedlen) { maxalignedlen = alignedlen; } for (frontptr = lowfront+2; frontptr <= highfront; frontptr++) { bestfront = insertion_value; bestfront.backreference = FT_EOP_INSERTION; if (frontptr <= highfront - 1) { if (bestfront.row < replacement_value.row + 1) { bestfront = replacement_value; bestfront.backreference = FT_EOP_REPLACEMENT; bestfront.row++; } else { if (bestfront.row == replacement_value.row + 1) { bestfront.backreference |= FT_EOP_REPLACEMENT; } } } if (frontptr <= highfront - 2) { if (bestfront.row < frontptr->row + 1) { bestfront = *frontptr; bestfront.backreference = FT_EOP_DELETION; bestfront.row++; } else { if (bestfront.row == frontptr->row + 1) { bestfront.backreference |= FT_EOP_DELETION; } } } UPDATE_MATCH_HISTORY(bestfront.matchhistory_count,bestfront.matchhistory); if (frontptr < highfront) { insertion_value = replacement_value; replacement_value = *frontptr; } *frontptr = bestfront; add_matches(midfront,frontptr,mask,useq,vseq); alignedlen = GT_MULT2(frontptr->row) + FRONT_DIAGONAL(frontptr); if (maxalignedlen < alignedlen) { maxalignedlen = alignedlen; } } return maxalignedlen; }
void gt_alignment_show_generic(GtUchar *buffer, bool downcase, const GtAlignment *alignment, FILE *fp, unsigned int width, const GtUchar *characters, GtUchar wildcardshow) { GtMultieop meop; GtUword idx_eop, idx_u = 0, idx_v = 0, meoplen, alignmentlength = 0, suffix_bits_used = 0, prefix_positive = 0, pol_size = 0, firstseedcolumn = GT_UWORD_MAX, lastseedcolumn = GT_UWORD_MAX; const GtUword max_history = 64; unsigned int pos = 0; GtUchar *topbuf = buffer, *midbuf = NULL, *lowbuf = NULL; GtWord prefix_positive_sum = 0; uint64_t suffix_bits = 0, set_mask = 0; if (alignment->pol_info != NULL) { pol_size = GT_MULT2(alignment->pol_info->cut_depth); set_mask = ((uint64_t) 1) << (max_history - 1); } gt_assert(alignment != NULL && (characters == NULL || !downcase)); topbuf[width] = '\n'; midbuf = topbuf + width + 1; midbuf[width] = '\n'; lowbuf = midbuf + width + 1; lowbuf[width] = '\n'; meoplen = gt_multieoplist_get_num_entries(alignment->eops); gt_assert(meoplen > 0); idx_eop = meoplen - 1; while (true) { meop = gt_multieoplist_get_entry(alignment->eops, idx_eop); switch (meop.type) { GtUword j; case Mismatch: case Match: case Replacement: for (j = 0; j < meop.steps && idx_u < alignment->ulen && idx_v < alignment->vlen; j++) { GtUchar a = alignment->u[idx_u]; GtUchar b = alignment->v[idx_v]; bool is_match; if (characters != NULL) { topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a]; is_match = (a == b && !ISSPECIAL(a)) ? true : false; lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b]; } else { topbuf[pos] = a; is_match = ((downcase && tolower((int) a) == tolower((int) b)) || (!downcase && a == b)) ? true : false; lowbuf[pos] = b; } if (is_match) { if (alignment->useedoffset <= idx_u && idx_u < alignment->useedoffset + alignment->seedlen) { if (alignment->seed_display) { midbuf[pos] = (GtUchar) '+'; } else { midbuf[pos] = (GtUchar) MATCHSYMBOL; } if (firstseedcolumn == GT_UWORD_MAX) { firstseedcolumn = alignmentlength; } lastseedcolumn = alignmentlength; } else { midbuf[pos] = (GtUchar) MATCHSYMBOL; } } else { midbuf[pos] = (GtUchar) MISMATCHSYMBOL; } pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(is_match); alignmentlength++; idx_u++; idx_v++; } break; case Deletion: for (j = 0; j < meop.steps && idx_u < alignment->ulen; j++) { GtUchar a = alignment->u[idx_u++]; if (characters != NULL) { topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a]; } else { topbuf[pos] = a; } midbuf[pos] = (GtUchar) MISMATCHSYMBOL; lowbuf[pos] = (GtUchar) GAPSYMBOL; pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; case Insertion: for (j = 0; j < meop.steps && idx_v < alignment->vlen; j++) { GtUchar b = alignment->v[idx_v++]; topbuf[pos] = (GtUchar) GAPSYMBOL; midbuf[pos] = (GtUchar) MISMATCHSYMBOL; if (characters != NULL) { lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b]; } else { lowbuf[pos] = b; } pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; } if (idx_eop > 0 && (idx_u < alignment->ulen || idx_v < alignment->vlen)) { idx_eop--; } else { break; } } if (pos > 0) { topbuf[pos] = '\n'; fwrite(topbuf,sizeof *topbuf,pos+1,fp); midbuf[pos] = '\n'; fwrite(midbuf,sizeof *midbuf,pos+1,fp); lowbuf[pos] = '\n'; fwrite(lowbuf,sizeof *lowbuf,pos+1,fp); } if (alignment->pol_info != NULL) { GtUword suffix_positive; GtWord suffix_positive_sum = 0; bool startpolished = false, endpolished = false; for (suffix_positive = 0; suffix_positive < suffix_bits_used; suffix_positive++) { suffix_positive_sum += ((suffix_bits & set_mask) ? alignment->pol_info->match_score : -alignment->pol_info->difference_score); if (suffix_positive_sum < 0) { break; } set_mask >>= 1; } gt_assert(prefix_positive <= alignmentlength && prefix_positive <= alignmentlength); if (prefix_positive >= pol_size || prefix_positive == alignmentlength || firstseedcolumn < pol_size) { startpolished = true; } if (suffix_positive >= pol_size || suffix_positive == alignmentlength || (lastseedcolumn != GT_UWORD_MAX && lastseedcolumn + pol_size > alignmentlength)) { endpolished = true; } printf("# polishing(m=" GT_WD ",d=" GT_WD ",p=" GT_WU "): " GT_WU "/" GT_WU, alignment->pol_info->match_score, -alignment->pol_info->difference_score, pol_size, prefix_positive, suffix_positive); if (firstseedcolumn < pol_size) { printf(", seed_on_start"); } if (lastseedcolumn + pol_size > alignmentlength) { printf(", seed_on_end"); } if (alignment->withpolcheck) { printf("\n"); gt_assert(startpolished && endpolished); } else { if (!startpolished) { printf(", start not polished"); } if (!endpolished) { printf(", end not polished"); } printf("\n"); } }
static void front_trace2eoplist_directed(GtEoplist *eoplist, const GtFrontTrace *front_trace, const GtUchar *useq, GT_UNUSED GtUword ulen, const GtUchar *vseq, GT_UNUSED GtUword vlen, const GtFtPolished_point *pp) { GtUword distance, localoffset, globaloffset, remainingvalidfronts, totalrunlength = 0, trimleft; GtWord diagonal; unsigned int row, lcs; uint8_t trace, preferred_eop = FT_EOP_MISMATCH; gt_assert(front_trace != NULL && front_trace->gen_nextfree > 0 && pp != NULL); localoffset = polished_point2offset(front_trace,pp); remainingvalidfronts = valid_total_fronts(front_trace->gen_table, pp->distance, front_trace->gen_nextfree); gt_assert(remainingvalidfronts <= front_trace->backref_nextfree); globaloffset = front_trace->backref_nextfree - remainingvalidfronts; distance = pp->distance; diagonal = (GtWord) pp->alignedlen - (GtWord) GT_MULT2(pp->row); trace = front_trace->backref_table[globaloffset + localoffset].bits; lcs = front_trace->backref_table[globaloffset + localoffset].lcs; row = pp->row; trimleft = pp->trimleft; gt_assert(distance < front_trace->gen_nextfree); while (distance > 0) { GtUword nextrowadd; GtWord base_diagonal; if (eoplist != NULL) { if (lcs > 0) { gt_eoplist_match_add(eoplist,lcs); } } else { gt_check_diagonal_run(useq, vseq, diagonal, row - lcs, row); } if (trace & preferred_eop) { totalrunlength++; if (preferred_eop == FT_EOP_MISMATCH) { nextrowadd = 1; } else { if (preferred_eop == FT_EOP_INSERTION) { gt_assert(-(GtWord) ulen < diagonal); diagonal--; nextrowadd = 0; } else { gt_assert(preferred_eop == FT_EOP_DELETION); gt_assert(diagonal < (GtWord) vlen); diagonal++; nextrowadd = 1; } } } else { if (trace & FT_EOP_MISMATCH) { preferred_eop = FT_EOP_MISMATCH; nextrowadd = 1; } else { if (trace & FT_EOP_INSERTION) { gt_assert(-(GtWord) ulen < diagonal); diagonal--; preferred_eop = FT_EOP_INSERTION; nextrowadd = 0; } else { gt_assert(trace & FT_EOP_DELETION); gt_assert(diagonal < (GtWord) vlen); diagonal++; preferred_eop = FT_EOP_DELETION; nextrowadd = 1; } } } if (eoplist != NULL) { if (preferred_eop == FT_EOP_DELETION) { gt_eoplist_deletion_add(eoplist); } else { if (preferred_eop == FT_EOP_INSERTION) { gt_eoplist_insertion_add(eoplist); } else { gt_eoplist_mismatch_add(eoplist); } } } gt_assert(trimleft >= (GtUword) front_trace->gen_table[distance].trimleft_diff); trimleft -= (GtUword) front_trace->gen_table[distance].trimleft_diff; distance--; base_diagonal = (GtWord) trimleft - (GtWord) distance; gt_assert(base_diagonal <= diagonal); gt_assert(diagonal < base_diagonal + (GtWord) front_trace->gen_table[distance].valid); localoffset = (GtUword) (diagonal - base_diagonal); gt_assert((GtUword) front_trace->gen_table[distance].valid <= globaloffset); globaloffset -= (GtUword) front_trace->gen_table[distance].valid; gt_assert(row >= lcs + nextrowadd); row -= lcs + nextrowadd; trace = front_trace->backref_table[globaloffset + localoffset].bits; lcs = front_trace->backref_table[globaloffset + localoffset].lcs; } /*printf("avg runlength=%.2f\n",(double) pp->distance/totalrunlength);*/ gt_assert(globaloffset + localoffset == 0 && trace == 0); if (eoplist != NULL && lcs > 0) { gt_eoplist_match_add(eoplist,lcs); } }
static void front_trace2polished_eoplist(GtEoplist *eoplist, GtFrontTrace *front_trace, const GtFtPolished_point *pp, GtUword pol_size, GtWord match_score, GtWord difference_score, const GtUchar *useq, GtUword ulen, const GtUchar *vseq, GtUword vlen) { GtUword localoffset, globaloffset, remainingvalidfronts; GtBacktraceFrontStackelem *stack_top_ptr; GtBacktraceFrontInfo bti; unsigned int lastlcs; bti.ulen = ulen; bti.vlen = vlen; bti.match_score = match_score; bti.difference_score = difference_score; bti.on_polsize_suffix = true; front_trace->backtracestack.nextfree = 0; if (front_trace->backtracepath_allocated < pp->distance+1) { front_trace->backtracepath_allocated = pp->distance + 1; front_trace->backtracepath = gt_realloc(front_trace->backtracepath, sizeof *front_trace->backtracepath * (pp->distance+1)); } gt_assert(front_trace != NULL && front_trace->gen_nextfree > 0 && pp != NULL); localoffset = polished_point2offset(front_trace,pp); remainingvalidfronts = valid_total_fronts(front_trace->gen_table, pp->distance, front_trace->gen_nextfree); gt_assert(remainingvalidfronts <= front_trace->backref_nextfree); globaloffset = front_trace->backref_nextfree - remainingvalidfronts; stack_top_ptr = stack_top_ptr_get(&front_trace->backtracestack); stack_top_ptr->diagonal = (GtWord) pp->alignedlen - (GtWord) GT_MULT2(pp->row); stack_top_ptr->distance = pp->distance; stack_top_ptr->trace = front_trace->backref_table[globaloffset + localoffset].bits; stack_top_ptr->row = pp->row; stack_top_ptr->eopcode = 0; lastlcs = stack_top_ptr->lcs = front_trace->backref_table[globaloffset + localoffset].lcs; stack_top_ptr->scoresum = stack_top_ptr->lcs * match_score; stack_top_ptr->globaloffset = globaloffset; stack_top_ptr->trimleft = pp->trimleft; stack_top_ptr->lcs_sum = stack_top_ptr->lcs; stack_top_ptr->pathlength = 0; /* number of errors */ while (front_trace->backtracestack.nextfree > 0) { front_trace->backtracestack.nextfree--; stack_top_ptr = front_trace->backtracestack.space + front_trace->backtracestack.nextfree; if (bti.on_polsize_suffix && stack_top_ptr->lcs_sum + stack_top_ptr->pathlength >= pol_size) { bti.on_polsize_suffix = false; } if (stack_top_ptr->pathlength > 0) { gt_assert(stack_top_ptr->pathlength - 1 <= pp->distance); front_trace->backtracepath[stack_top_ptr->pathlength-1].eopcode = stack_top_ptr->eopcode; front_trace->backtracepath[stack_top_ptr->pathlength-1].lcs = stack_top_ptr->lcs; } if (stack_top_ptr->trace != 0) { if (eoplist == NULL) { gt_check_diagonal_run(useq, vseq, stack_top_ptr->diagonal, stack_top_ptr->row - stack_top_ptr->lcs, stack_top_ptr->row); } gt_front_trace_backtrace_step(&bti, front_trace, stack_top_ptr->diagonal, stack_top_ptr->scoresum, stack_top_ptr->distance, stack_top_ptr->trace, stack_top_ptr->globaloffset, stack_top_ptr->trimleft, stack_top_ptr->row, stack_top_ptr->lcs, stack_top_ptr->lcs_sum, stack_top_ptr->pathlength); } else { /* trace == 0 */ break; } } gt_assert(stack_top_ptr != NULL); if (eoplist != NULL) { gt_front_trace_backtracepath2eoplist(eoplist, lastlcs, front_trace->backtracepath, stack_top_ptr->pathlength, ulen, vlen); } }
void gt_eoplist_format_generic(FILE *fp, const GtEoplist *eoplist, GtEoplistReader *eoplist_reader, bool distinguish_mismatch_match, const GtUchar *characters, GtUchar wildcardshow) { GtCigarOp co; unsigned int pos = 0; GtUword idx_u = 0, idx_v = 0, alignmentlength = 0, firstseedcolumn = GT_UWORD_MAX; GtUchar *topbuf = eoplist_reader->outbuffer, *midbuf = NULL, *lowbuf = NULL; #ifndef OUTSIDE_OF_GT uint64_t suffix_bits = 0, set_mask = 0; GtUword suffix_bits_used = 0, prefix_positive = 0, pol_size = 0, lastseedcolumn = GT_UWORD_MAX; const GtUword max_history = 64; GtWord prefix_positive_sum = 0; if (eoplist->pol_info != NULL) { pol_size = GT_MULT2(eoplist->pol_info->cut_depth); set_mask = ((uint64_t) 1) << (max_history - 1); } #endif gt_assert(eoplist_reader != NULL); topbuf[eoplist_reader->width] = '\n'; midbuf = topbuf + eoplist_reader->width + 1; midbuf[eoplist_reader->width] = '\n'; lowbuf = midbuf + eoplist_reader->width + 1; lowbuf[eoplist_reader->width] = '\n'; gt_eoplist_reader_reset(eoplist_reader,eoplist); if (distinguish_mismatch_match) { gt_eoplist_reader_distinguish_mismatch_match(eoplist_reader); } while (gt_eoplist_reader_next_cigar(&co,eoplist_reader)) { switch (co.eoptype) { GtUword j; GtUchar cc_a, cc_b; case GtMatchOp: case GtMismatchOp: for (j = 0; j < co.iteration && idx_u < eoplist->ulen && idx_v < eoplist->vlen; j++) { cc_a = eoplist->useq[idx_u]; cc_b = eoplist->vseq[idx_v]; bool is_match; if (characters != NULL) { topbuf[pos] = ISSPECIAL(cc_a) ? wildcardshow : characters[cc_a]; lowbuf[pos] = ISSPECIAL(cc_b) ? wildcardshow : characters[cc_b]; is_match = (cc_a == cc_b && !ISSPECIAL(cc_a)) ? true : false; } else { topbuf[pos] = cc_a; is_match = (cc_a == cc_b) ? true : false; lowbuf[pos] = cc_b; } if (is_match) { if (eoplist->useedoffset <= idx_u && idx_u < eoplist->useedoffset + eoplist->seedlen) { if (eoplist->seed_display) { midbuf[pos] = (GtUchar) '+'; } else { midbuf[pos] = (GtUchar) EOPLIST_MATCHSYMBOL; } if (firstseedcolumn == GT_UWORD_MAX) { firstseedcolumn = alignmentlength; } #ifndef OUTSIDE_OF_GT lastseedcolumn = alignmentlength; #endif } else { midbuf[pos] = (GtUchar) EOPLIST_MATCHSYMBOL; } } else { midbuf[pos] = (GtUchar) EOPLIST_MISMATCHSYMBOL; } pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(is_match); alignmentlength++; idx_u++; idx_v++; } break; case GtDeletionOp: for (j = 0; j < co.iteration && idx_u < eoplist->ulen; j++) { cc_a = eoplist->useq[idx_u++]; if (characters != NULL) { topbuf[pos] = ISSPECIAL(cc_a) ? wildcardshow : characters[cc_a]; } else { topbuf[pos] = cc_a; } midbuf[pos] = EOPLIST_MISMATCHSYMBOL; lowbuf[pos] = EOPLIST_GAPSYMBOL; pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; case GtInsertionOp: for (j = 0; j < co.iteration && idx_v < eoplist->vlen; j++) { cc_b = eoplist->vseq[idx_v++]; topbuf[pos] = EOPLIST_GAPSYMBOL; midbuf[pos] = EOPLIST_MISMATCHSYMBOL; if (characters != NULL) { lowbuf[pos] = ISSPECIAL(cc_b) ? wildcardshow : characters[cc_b]; } else { lowbuf[pos] = cc_b; } pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; default: fprintf(stderr,"file %s, line %d: illegal eoptype %d\n", __FILE__,__LINE__,co.eoptype); exit(GT_EXIT_PROGRAMMING_ERROR); } } if (pos > 0) { topbuf[pos] = '\n'; fwrite(topbuf,sizeof *topbuf,pos+1,fp); midbuf[pos] = '\n'; fwrite(midbuf,sizeof *midbuf,pos+1,fp); lowbuf[pos] = '\n'; fwrite(lowbuf,sizeof *lowbuf,pos+1,fp); } #ifndef OUTSIDE_OF_GT if (eoplist->pol_info != NULL) { GtUword suffix_positive; GtWord suffix_positive_sum = 0; bool startpolished = false, endpolished = false; for (suffix_positive = 0; suffix_positive < suffix_bits_used; suffix_positive++) { suffix_positive_sum += ((suffix_bits & set_mask) ? eoplist->pol_info->match_score : -eoplist->pol_info->difference_score); if (suffix_positive_sum < 0) { break; } set_mask >>= 1; } gt_assert(prefix_positive <= alignmentlength); if (prefix_positive >= pol_size || prefix_positive == alignmentlength || firstseedcolumn < pol_size) { startpolished = true; } if (suffix_positive >= pol_size || suffix_positive == alignmentlength || (lastseedcolumn != GT_UWORD_MAX && lastseedcolumn + pol_size > alignmentlength)) { endpolished = true; } fprintf(fp, "# polishing(m=" GT_WD ",d=" GT_WD ",p=" GT_WU "): " GT_WU "/" GT_WU, eoplist->pol_info->match_score, -eoplist->pol_info->difference_score, pol_size, prefix_positive, suffix_positive); if (firstseedcolumn < pol_size) { fprintf(fp, ", seed_on_start"); } if (lastseedcolumn + pol_size > alignmentlength) { fprintf(fp, ", seed_on_end"); } if (eoplist->withpolcheck) { fprintf(fp, "\n"); gt_assert(startpolished); gt_assert(endpolished); } else { if (!startpolished) { fprintf(fp, ", start not polished"); } if (!endpolished) { fprintf(fp, ", end not polished"); } fprintf(fp, "\n"); } }