extern "C" void bwa_seed2genome_pos(uint64_t sa_pos, uint64_t *contig_id, uint64_t *contig_pos, bwa_seq_t *seq) { bwa_seq_t *p=seq ; p->sa = sa_pos ; p->c1 = 1 ; p->type=BWA_TYPE_UNIQUE ; p->cigar=NULL ; p->strand=0 ; mybwa_cal_pac_pos_core(bwt_bwt[0], bwt_bwt[1], p, 0, 0); uint64_t len = pos_end(p) - p->pos; int seq_id=-1 ; bns_coor_pac2real(bwt_bns, p->pos, len, &seq_id) ; uint64_t pos = (int)(p->pos - bwt_bns->anns[seq_id].offset) ; if (false && sa_pos==461542) { fprintf(stdout, "seq_id=%i, pos=%lu, n_aln=%i, multi=%i, strand=%i\n", seq_id, pos, p->n_aln, p->n_multi, p->strand) ; p->sa = 461542;//461970 ; p->c1 = 1 ; p->type=BWA_TYPE_UNIQUE ; p->cigar=NULL ; p->strand=1 ; mybwa_cal_pac_pos_core(bwt_bwt[0], bwt_bwt[1], p, 0, 0); uint64_t len = pos_end(p) - p->pos; int seq_id=-1 ; bns_coor_pac2real(bwt_bns, p->pos, len, &seq_id) ; uint64_t pos = (int)(p->pos - bwt_bns->anns[seq_id].offset) ; fprintf(stdout, "+++ seq_id=%i, pos=%lu, n_aln=%i, multi=%i, strand=%i\n", seq_id, pos, p->n_aln, p->n_multi, p->strand) ; //fprintf(stdout, "bwt->seq_len=%lld", (long long int)bwt_bwt[0]->seq_len) ; //fprintf(stdout, "reverse_bwt->seq_len=%lld", (long long int)bwt_bwt[1]->seq_len) ; //bwa_seq_t *a=NULL ; //fprintf(stdout, "error%lld", (long long int)a->sa) ; } *contig_id=seq_id ; *contig_pos=pos ; }
Alignment BWA::generate_final_alignment_from_sequence(bwa_seq_t* sequence) { // Calculate the local coordinate and local alignment. bwa_cal_pac_pos_core(bwts[0],bwts[1],sequence,options.max_diff,options.fnr); bwa_refine_gapped(bns, 1, sequence, reference, NULL); // Copy the local alignment data into the alignment object. Alignment alignment; // Populate basic path info alignment.edit_distance = sequence->nm; alignment.num_mismatches = sequence->n_mm; alignment.num_gap_opens = sequence->n_gapo; alignment.num_gap_extensions = sequence->n_gape; alignment.num_best = sequence->c1; alignment.num_second_best = sequence->c2; // Final alignment position. alignment.type = sequence->type; bns_coor_pac2real(bns, sequence->pos, pos_end(sequence) - sequence->pos, &alignment.contig); alignment.pos = sequence->pos - bns->anns[alignment.contig].offset + 1; alignment.negative_strand = sequence->strand; alignment.mapping_quality = sequence->mapQ; // Cigar step. alignment.cigar = NULL; if(sequence->cigar) { alignment.cigar = new uint16_t[sequence->n_cigar]; memcpy(alignment.cigar,sequence->cigar,sequence->n_cigar*sizeof(uint16_t)); } alignment.n_cigar = sequence->n_cigar; // MD tag with a better breakdown of differences in the cigar alignment.md = strdup(sequence->md); delete[] sequence->md; sequence->md = NULL; return alignment; }
static int fix_cigar(const char *qname, const bntseq_t *bns, bsw2hit_t *p, int n_cigar, uint32_t *cigar) { // FIXME: this routine does not work if the query bridge three reference sequences int32_t coor, refl, lq; int x, y, i, seqid; bns_coor_pac2real(bns, p->k, p->len, &seqid); coor = p->k - bns->anns[seqid].offset; refl = bns->anns[seqid].len; x = coor; y = 0; // test if the alignment goes beyond the boundary for (i = 0; i < n_cigar; ++i) { int op = cigar[i]&0xf, ln = cigar[i]>>4; if (op == 1 || op == 4 || op == 5) y += ln; else if (op == 2) x += ln; else x += ln, y += ln; } lq = y; // length of the query sequence if (x > refl) { // then fix it int j, nc, mq[2], nlen[2]; uint32_t *cn, kk = 0; nc = mq[0] = mq[1] = nlen[0] = nlen[1] = 0; cn = calloc(n_cigar + 3, 4); x = coor; y = 0; for (i = j = 0; i < n_cigar; ++i) { int op = cigar[i]&0xf, ln = cigar[i]>>4; if (op == 4 || op == 5 || op == 1) { // ins or clipping y += ln; cn[j++] = cigar[i]; } else if (op == 2) { // del if (x + ln >= refl && nc == 0) { cn[j++] = (uint32_t)(lq - y)<<4 | 4; nc = j; cn[j++] = (uint32_t)y<<4 | 4; kk = p->k + (x + ln - refl); nlen[0] = x - coor; nlen[1] = p->len - nlen[0] - ln; } else cn[j++] = cigar[i]; x += ln; } else if (op == 0) { // match if (x + ln >= refl && nc == 0) { // FIXME: not consider a special case where a split right between M and I cn[j++] = (uint32_t)(refl - x)<<4 | 0; // write M cn[j++] = (uint32_t)(lq - y - (refl - x))<<4 | 4; // write S nc = j; mq[0] += refl - x; cn[j++] = (uint32_t)(y + (refl - x))<<4 | 4; if (x + ln - refl) cn[j++] = (uint32_t)(x + ln - refl)<<4 | 0; mq[1] += x + ln - refl; kk = bns->anns[seqid].offset + refl; nlen[0] = refl - coor; nlen[1] = p->len - nlen[0]; } else { cn[j++] = cigar[i]; mq[nc?1:0] += ln; } x += ln; y += ln; } } if (mq[0] > mq[1]) { // then take the first alignment n_cigar = nc; memcpy(cigar, cn, 4 * nc); p->len = nlen[0]; } else { p->k = kk; p->len = nlen[1]; n_cigar = j - nc; memcpy(cigar, cn + nc, 4 * (j - nc)); } free(cn); }
void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2, const char *bwa_rg_id) { int j; //if (strcmp (p->name, "HWUSI-EAS1600:WT2_250_read_1:11_30_09:3:1:83:1066#0") == 0) //{ // fprintf (stderr, "found %s\n", p->name); //} if (p->type != BWA_TYPE_NO_MATCH || (mate && mate->type != BWA_TYPE_NO_MATCH)) { int seqid, nn, am = 0, flag = p->extra_flag; char XT; if (p->type == BWA_TYPE_NO_MATCH) { p->pos = mate->pos; p->strand = mate->strand; flag |= SAM_FSU; j = 1; } else j = pos_end(p) - p->pos; // j is the length of the reference in the alignment // get seqid nn = bns_coor_pac2real(bns, p->pos, j, &seqid); if (p->type != BWA_TYPE_NO_MATCH && p->pos + j - bns->anns[seqid].offset > bns->anns[seqid].len) flag |= SAM_FSU; // flag UNMAP as this alignment bridges two adjacent reference sequences // update flag and print it if (p->strand) flag |= SAM_FSR; if (mate) { if (mate->type != BWA_TYPE_NO_MATCH) { if (mate->strand) flag |= SAM_FMR; } else flag |= SAM_FMU; } printf("%s\t%d\t%s\t", p->name, flag, bns->anns[seqid].name); printf("%d\t%d\t", (int)(p->pos - bns->anns[seqid].offset + 1), p->mapQ); // print CIGAR if (p->cigar) { for (j = 0; j != p->n_cigar; ++j) printf("%d%c", __cigar_len(p->cigar[j]), "MIDSN"[__cigar_op(p->cigar[j])]); } else if (p->type == BWA_TYPE_NO_MATCH) printf("*"); else printf("%dM", p->len); // print mate coordinate if (mate && mate->type != BWA_TYPE_NO_MATCH) { int m_seqid, m_is_N; long long isize; am = mate->seQ < p->seQ? mate->seQ : p->seQ; // smaller single-end mapping quality // redundant calculation here, but should not matter too much m_is_N = bns_coor_pac2real(bns, mate->pos, mate->len, &m_seqid); printf("\t%s\t", (seqid == m_seqid)? "=" : bns->anns[m_seqid].name); isize = (seqid == m_seqid)? pos_5(mate) - pos_5(p) : 0; if (p->type == BWA_TYPE_NO_MATCH) isize = 0; printf("%d\t%lld\t", (int)(mate->pos - bns->anns[m_seqid].offset + 1), isize); } else if (mate) printf("\t=\t%d\t0\t", (int)(p->pos - bns->anns[seqid].offset + 1)); else printf("\t*\t0\t0\t"); // print sequence and quality if (p->strand == 0) for (j = 0; j != p->full_len; ++j) putchar("ACGTN"[(int)p->seq[j]]); else for (j = 0; j != p->full_len; ++j) putchar("TGCAN"[p->seq[p->full_len - 1 - j]]); putchar('\t'); if (p->qual) { if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality printf("%s", p->qual); } else printf("*"); if (bwa_rg_id) printf("\tRG:Z:%s", bwa_rg_id); if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len); if (p->type != BWA_TYPE_NO_MATCH) { int i; // calculate XT tag XT = "NURM"[p->type]; if (nn > 10) XT = 'N'; // print tags printf("\tXT:A:%c\t%s:i:%d", XT, (mode & BWA_MODE_COMPREAD)? "NM" : "CM", p->nm); // print XS tag, to be compatible with Cufflinks if (p->sense_strand != 2 ) printf("\tXS:A:%c", p->sense_strand ? '-':'+' ); else printf("\tXS:A:."); if (nn) printf("\tXN:i:%d", nn); if (mate) printf("\tSM:i:%lu\tAM:i:%d", p->seQ, am); if (p->type != BWA_TYPE_MATESW) { // X0 and X1 are not available for this type of alignment printf("\tX0:i:%lu", p->c1); if (p->c1 <= max_top2) printf("\tX1:i:%lu", p->c2); } printf("\tXM:i:%d\tXO:i:%d\tXG:i:%d", p->n_mm, p->n_gapo_t + p->n_gapo_q, p->n_gapo_t+p->n_gape_t+p->n_gapo_q+p->n_gape_q); if (p->md) printf("\tMD:Z:%s", p->md); // print multiple hits if (p->n_multi) { bool header_printed = 0; for (i = 0; i < p->n_multi; ++i) { bwt_multi1_t *q = p->multi + i; j = pos_end_multi(q, p->len) - q->pos; nn = bns_coor_pac2real(bns, q->pos, j, &seqid); if(pos_end_multi(q, p->len) - bns->anns[seqid].offset > bns->anns[seqid].len) continue; //the alignment bridges adjacent sequences (chroms) //TODO: need to avoid this at the first place in the junction discovery step, but this should be rare for mm or human if (! header_printed) { header_printed = 1; printf("\tXA:Z:"); } int k; printf("%s,%c%d,", bns->anns[seqid].name, q->strand? '-' : '+', (int)(q->pos - bns->anns[seqid].offset + 1)); if (q->cigar) { for (k = 0; k < q->n_cigar; ++k) printf("%d%c", __cigar_len(q->cigar[k]), "MIDSN"[__cigar_op(q->cigar[k])]); } else printf("%dM", p->len); printf(",%d", q->nm); //q->gap_t + q->gap_q + q->mm); if (q->sense_strand != 2) printf(",%c;", q->sense_strand? '-' : '+' ); else printf(",.;"); } } } putchar('\n'); } else { // this read has no match ubyte_t *s = p->strand? p->rseq : p->seq; int flag = p->extra_flag | SAM_FSU; if (mate && mate->type == BWA_TYPE_NO_MATCH) flag |= SAM_FMU; printf("%s\t%d\t*\t0\t0\t*\t*\t0\t0\t", p->name, flag); for (j = 0; j != p->len; ++j) putchar("ACGTN"[(int)s[j]]); putchar('\t'); if (p->qual) { if (p->strand) seq_reverse(p->len, p->qual, 0); // reverse quality printf("%s", p->qual); } else printf("*"); if (p->clip_len < p->full_len) printf("\tXC:i:%d", p->clip_len); putchar('\n'); } }