void gt_alignment_show_generic(GtUchar *buffer, bool downcase, const GtAlignment *alignment, FILE *fp, unsigned int width, const GtUchar *characters, GtUchar wildcardshow) { GtMultieop meop; GtUword idx_eop, idx_u = 0, idx_v = 0, meoplen, alignmentlength = 0, suffix_bits_used = 0, prefix_positive = 0, pol_size = 0, firstseedcolumn = GT_UWORD_MAX, lastseedcolumn = GT_UWORD_MAX; const GtUword max_history = 64; unsigned int pos = 0; GtUchar *topbuf = buffer, *midbuf = NULL, *lowbuf = NULL; GtWord prefix_positive_sum = 0; uint64_t suffix_bits = 0, set_mask = 0; if (alignment->pol_info != NULL) { pol_size = GT_MULT2(alignment->pol_info->cut_depth); set_mask = ((uint64_t) 1) << (max_history - 1); } gt_assert(alignment != NULL && (characters == NULL || !downcase)); topbuf[width] = '\n'; midbuf = topbuf + width + 1; midbuf[width] = '\n'; lowbuf = midbuf + width + 1; lowbuf[width] = '\n'; meoplen = gt_multieoplist_get_num_entries(alignment->eops); gt_assert(meoplen > 0); idx_eop = meoplen - 1; while (true) { meop = gt_multieoplist_get_entry(alignment->eops, idx_eop); switch (meop.type) { GtUword j; case Mismatch: case Match: case Replacement: for (j = 0; j < meop.steps && idx_u < alignment->ulen && idx_v < alignment->vlen; j++) { GtUchar a = alignment->u[idx_u]; GtUchar b = alignment->v[idx_v]; bool is_match; if (characters != NULL) { topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a]; is_match = (a == b && !ISSPECIAL(a)) ? true : false; lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b]; } else { topbuf[pos] = a; is_match = ((downcase && tolower((int) a) == tolower((int) b)) || (!downcase && a == b)) ? true : false; lowbuf[pos] = b; } if (is_match) { if (alignment->useedoffset <= idx_u && idx_u < alignment->useedoffset + alignment->seedlen) { if (alignment->seed_display) { midbuf[pos] = (GtUchar) '+'; } else { midbuf[pos] = (GtUchar) MATCHSYMBOL; } if (firstseedcolumn == GT_UWORD_MAX) { firstseedcolumn = alignmentlength; } lastseedcolumn = alignmentlength; } else { midbuf[pos] = (GtUchar) MATCHSYMBOL; } } else { midbuf[pos] = (GtUchar) MISMATCHSYMBOL; } pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(is_match); alignmentlength++; idx_u++; idx_v++; } break; case Deletion: for (j = 0; j < meop.steps && idx_u < alignment->ulen; j++) { GtUchar a = alignment->u[idx_u++]; if (characters != NULL) { topbuf[pos] = ISSPECIAL(a) ? wildcardshow : characters[a]; } else { topbuf[pos] = a; } midbuf[pos] = (GtUchar) MISMATCHSYMBOL; lowbuf[pos] = (GtUchar) GAPSYMBOL; pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; case Insertion: for (j = 0; j < meop.steps && idx_v < alignment->vlen; j++) { GtUchar b = alignment->v[idx_v++]; topbuf[pos] = (GtUchar) GAPSYMBOL; midbuf[pos] = (GtUchar) MISMATCHSYMBOL; if (characters != NULL) { lowbuf[pos] = ISSPECIAL(b) ? wildcardshow : characters[b]; } else { lowbuf[pos] = b; } pos = gt_alignment_show_advance(pos,width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; } if (idx_eop > 0 && (idx_u < alignment->ulen || idx_v < alignment->vlen)) { idx_eop--; } else { break; } } if (pos > 0) { topbuf[pos] = '\n'; fwrite(topbuf,sizeof *topbuf,pos+1,fp); midbuf[pos] = '\n'; fwrite(midbuf,sizeof *midbuf,pos+1,fp); lowbuf[pos] = '\n'; fwrite(lowbuf,sizeof *lowbuf,pos+1,fp); } if (alignment->pol_info != NULL) { GtUword suffix_positive; GtWord suffix_positive_sum = 0; bool startpolished = false, endpolished = false; for (suffix_positive = 0; suffix_positive < suffix_bits_used; suffix_positive++) { suffix_positive_sum += ((suffix_bits & set_mask) ? alignment->pol_info->match_score : -alignment->pol_info->difference_score); if (suffix_positive_sum < 0) { break; } set_mask >>= 1; } gt_assert(prefix_positive <= alignmentlength && prefix_positive <= alignmentlength); if (prefix_positive >= pol_size || prefix_positive == alignmentlength || firstseedcolumn < pol_size) { startpolished = true; } if (suffix_positive >= pol_size || suffix_positive == alignmentlength || (lastseedcolumn != GT_UWORD_MAX && lastseedcolumn + pol_size > alignmentlength)) { endpolished = true; } printf("# polishing(m=" GT_WD ",d=" GT_WD ",p=" GT_WU "): " GT_WU "/" GT_WU, alignment->pol_info->match_score, -alignment->pol_info->difference_score, pol_size, prefix_positive, suffix_positive); if (firstseedcolumn < pol_size) { printf(", seed_on_start"); } if (lastseedcolumn + pol_size > alignmentlength) { printf(", seed_on_end"); } if (alignment->withpolcheck) { printf("\n"); gt_assert(startpolished && endpolished); } else { if (!startpolished) { printf(", start not polished"); } if (!endpolished) { printf(", end not polished"); } printf("\n"); } }
void gt_eoplist_format_generic(FILE *fp, const GtEoplist *eoplist, GtEoplistReader *eoplist_reader, bool distinguish_mismatch_match, const GtUchar *characters, GtUchar wildcardshow) { GtCigarOp co; unsigned int pos = 0; GtUword idx_u = 0, idx_v = 0, alignmentlength = 0, firstseedcolumn = GT_UWORD_MAX; GtUchar *topbuf = eoplist_reader->outbuffer, *midbuf = NULL, *lowbuf = NULL; #ifndef OUTSIDE_OF_GT uint64_t suffix_bits = 0, set_mask = 0; GtUword suffix_bits_used = 0, prefix_positive = 0, pol_size = 0, lastseedcolumn = GT_UWORD_MAX; const GtUword max_history = 64; GtWord prefix_positive_sum = 0; if (eoplist->pol_info != NULL) { pol_size = GT_MULT2(eoplist->pol_info->cut_depth); set_mask = ((uint64_t) 1) << (max_history - 1); } #endif gt_assert(eoplist_reader != NULL); topbuf[eoplist_reader->width] = '\n'; midbuf = topbuf + eoplist_reader->width + 1; midbuf[eoplist_reader->width] = '\n'; lowbuf = midbuf + eoplist_reader->width + 1; lowbuf[eoplist_reader->width] = '\n'; gt_eoplist_reader_reset(eoplist_reader,eoplist); if (distinguish_mismatch_match) { gt_eoplist_reader_distinguish_mismatch_match(eoplist_reader); } while (gt_eoplist_reader_next_cigar(&co,eoplist_reader)) { switch (co.eoptype) { GtUword j; GtUchar cc_a, cc_b; case GtMatchOp: case GtMismatchOp: for (j = 0; j < co.iteration && idx_u < eoplist->ulen && idx_v < eoplist->vlen; j++) { cc_a = eoplist->useq[idx_u]; cc_b = eoplist->vseq[idx_v]; bool is_match; if (characters != NULL) { topbuf[pos] = ISSPECIAL(cc_a) ? wildcardshow : characters[cc_a]; lowbuf[pos] = ISSPECIAL(cc_b) ? wildcardshow : characters[cc_b]; is_match = (cc_a == cc_b && !ISSPECIAL(cc_a)) ? true : false; } else { topbuf[pos] = cc_a; is_match = (cc_a == cc_b) ? true : false; lowbuf[pos] = cc_b; } if (is_match) { if (eoplist->useedoffset <= idx_u && idx_u < eoplist->useedoffset + eoplist->seedlen) { if (eoplist->seed_display) { midbuf[pos] = (GtUchar) '+'; } else { midbuf[pos] = (GtUchar) EOPLIST_MATCHSYMBOL; } if (firstseedcolumn == GT_UWORD_MAX) { firstseedcolumn = alignmentlength; } #ifndef OUTSIDE_OF_GT lastseedcolumn = alignmentlength; #endif } else { midbuf[pos] = (GtUchar) EOPLIST_MATCHSYMBOL; } } else { midbuf[pos] = (GtUchar) EOPLIST_MISMATCHSYMBOL; } pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(is_match); alignmentlength++; idx_u++; idx_v++; } break; case GtDeletionOp: for (j = 0; j < co.iteration && idx_u < eoplist->ulen; j++) { cc_a = eoplist->useq[idx_u++]; if (characters != NULL) { topbuf[pos] = ISSPECIAL(cc_a) ? wildcardshow : characters[cc_a]; } else { topbuf[pos] = cc_a; } midbuf[pos] = EOPLIST_MISMATCHSYMBOL; lowbuf[pos] = EOPLIST_GAPSYMBOL; pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; case GtInsertionOp: for (j = 0; j < co.iteration && idx_v < eoplist->vlen; j++) { cc_b = eoplist->vseq[idx_v++]; topbuf[pos] = EOPLIST_GAPSYMBOL; midbuf[pos] = EOPLIST_MISMATCHSYMBOL; if (characters != NULL) { lowbuf[pos] = ISSPECIAL(cc_b) ? wildcardshow : characters[cc_b]; } else { lowbuf[pos] = cc_b; } pos = gt_eoplist_show_advance(pos,eoplist_reader->width,topbuf,fp); GT_UPDATE_POSITIVE_INFO(false); alignmentlength++; } break; default: fprintf(stderr,"file %s, line %d: illegal eoptype %d\n", __FILE__,__LINE__,co.eoptype); exit(GT_EXIT_PROGRAMMING_ERROR); } } if (pos > 0) { topbuf[pos] = '\n'; fwrite(topbuf,sizeof *topbuf,pos+1,fp); midbuf[pos] = '\n'; fwrite(midbuf,sizeof *midbuf,pos+1,fp); lowbuf[pos] = '\n'; fwrite(lowbuf,sizeof *lowbuf,pos+1,fp); } #ifndef OUTSIDE_OF_GT if (eoplist->pol_info != NULL) { GtUword suffix_positive; GtWord suffix_positive_sum = 0; bool startpolished = false, endpolished = false; for (suffix_positive = 0; suffix_positive < suffix_bits_used; suffix_positive++) { suffix_positive_sum += ((suffix_bits & set_mask) ? eoplist->pol_info->match_score : -eoplist->pol_info->difference_score); if (suffix_positive_sum < 0) { break; } set_mask >>= 1; } gt_assert(prefix_positive <= alignmentlength); if (prefix_positive >= pol_size || prefix_positive == alignmentlength || firstseedcolumn < pol_size) { startpolished = true; } if (suffix_positive >= pol_size || suffix_positive == alignmentlength || (lastseedcolumn != GT_UWORD_MAX && lastseedcolumn + pol_size > alignmentlength)) { endpolished = true; } fprintf(fp, "# polishing(m=" GT_WD ",d=" GT_WD ",p=" GT_WU "): " GT_WU "/" GT_WU, eoplist->pol_info->match_score, -eoplist->pol_info->difference_score, pol_size, prefix_positive, suffix_positive); if (firstseedcolumn < pol_size) { fprintf(fp, ", seed_on_start"); } if (lastseedcolumn + pol_size > alignmentlength) { fprintf(fp, ", seed_on_end"); } if (eoplist->withpolcheck) { fprintf(fp, "\n"); gt_assert(startpolished); gt_assert(endpolished); } else { if (!startpolished) { fprintf(fp, ", start not polished"); } if (!endpolished) { fprintf(fp, ", end not polished"); } fprintf(fp, "\n"); } }