void bamUnpackAux(const bam1_t *bam, struct dyString *dy) /* Unpack the tag:type:val part of bam into dy */ { // adapted from part of bam.c bam_format1: uint8_t *s = bam1_aux(bam); boolean firstTime = TRUE; while (s < bam->data + bam->data_len) { if (firstTime) firstTime = FALSE; else dyStringAppendC(dy, '\t'); dyStringAppendC(dy, *s++); dyStringAppendC(dy, *s++); dyStringAppendC(dy, ':'); dyStringAppendC(dy, s[0]); dyStringAppendC(dy, ':'); uint8_t type = *s++; if (type == 'A') { dyStringPrintf(dy, "%c", *s); ++s; } else if (type == 'C') { dyStringPrintf(dy, "%u", *s); ++s; } else if (type == 'c') { dyStringPrintf(dy, "%d", *s); ++s; } else if (type == 'S') { dyStringPrintf(dy, "%u", *(uint16_t*)s); s += 2; } else if (type == 's') { dyStringPrintf(dy, "%d", *(int16_t*)s); s += 2; } else if (type == 'I') { dyStringPrintf(dy, "%u", *(uint32_t*)s); s += 4; } else if (type == 'i') { dyStringPrintf(dy, "%d", *(int32_t*)s); s += 4; } else if (type == 'f') { dyStringPrintf(dy, "%g", *(float*)s); s += 4; } else if (type == 'd') { dyStringPrintf(dy, "%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { dyStringAppend(dy, (char *)s); s += strlen((char *)s) + 1; } } }
void bamShowTags(const bam1_t *bam) /* Print out tags in HTML: bold key, no type indicator for brevity. */ { // adapted from part of bam.c bam_format1: uint8_t *s = bam1_aux(bam); while (s < bam->data + bam->data_len) { uint8_t type, key[2]; key[0] = s[0]; key[1] = s[1]; s += 2; type = *s; ++s; printf(" <B>%c%c</B>:", key[0], key[1]); if (type == 'A') { printf("%c", *s); ++s; } else if (type == 'C') { printf("%u", *s); ++s; } else if (type == 'c') { printf("%d", *s); ++s; } else if (type == 'S') { printf("%u", *(uint16_t*)s); s += 2; } else if (type == 's') { printf("%d", *(int16_t*)s); s += 2; } else if (type == 'I') { printf("%u", *(uint32_t*)s); s += 4; } else if (type == 'i') { printf("%d", *(int32_t*)s); s += 4; } else if (type == 'f') { printf("%g", *(float*)s); s += 4; } else if (type == 'd') { printf("%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { htmTextOut(stdout, (char *)s); s += strlen((char *)s) + 1; } } putc('\n', stdout); }
void bam_print(bam1_t* bam_p, int base_quality) { printf("\n------------------------------------------------------------------->\n"); printf("bam_p->data (qname): %s\n", bam1_qname(bam_p)); printf("bam_p->data (seq): %s\n", convert_to_sequence_string(bam1_seq(bam_p), bam_p->core.l_qseq)); //quality printf("bam_p->data (qual): "); char* quality = (char*) bam1_qual(bam_p); for (int i = 0; i < bam_p->core.l_qseq; i++) { printf("%c", (quality[i] + base_quality)); } printf("\n"); printf("bam_p->data (cigar): %s\n", convert_to_cigar_string(bam1_cigar(bam_p), bam_p->core.n_cigar)); //aux(optional) data printf("bam_p->data (aux): "); char* optional_fields = (char*) bam1_aux(bam_p); for (int i = 0; i < bam_p->l_aux; i++) { printf("%c", optional_fields[i]); } printf("\n"); //lengths printf("bam_p->l_aux: %i\n", bam_p->l_aux); printf("bam_p->data_len: %i\n", bam_p->data_len); printf("bam_p->m_data: %i\n", bam_p->m_data); //core printf("bam_p->core.tid: %i\n", bam_p->core.tid); printf("bam_p->core.pos: %i\n", bam_p->core.pos); printf("bam_p->core.bin: %u\n", bam_p->core.bin); printf("bam_p->core.qual: %u\n", bam_p->core.qual); printf("bam_p->core.l_qname: %u\n", bam_p->core.l_qname); printf("bam_p->core.flag (16 bits): %u\n", bam_p->core.flag); printf("bam_p->core.n_cigar: %u\n", bam_p->core.n_cigar); printf("bam_p->core.l_qseq: %i\n", bam_p->core.l_qseq); printf("bam_p->core.mtid: %i\n", bam_p->core.mtid); printf("bam_p->core.mpos: %i\n", bam_p->core.mpos); printf("bam_p->core.isize: %i\n", bam_p->core.isize); printf("\nbam1_t.core flags\n"); printf("-----------------------\n"); printf("flag (is_paired_end): %i\n", (bam_p->core.flag & BAM_FPAIRED) ? 1 : 0); printf("flag (is_paired_end_mapped): %i\n", (bam_p->core.flag & BAM_FPROPER_PAIR) ? 1 : 0); printf("flag (is_seq_unmapped): %i\n", (bam_p->core.flag & BAM_FUNMAP) ? 1 : 0); printf("flag (is_mate_unmapped): %i\n", (bam_p->core.flag & BAM_FMUNMAP) ? 1 : 0); printf("flag (seq_strand): %i\n", (bam_p->core.flag & BAM_FREVERSE) ? 1 : 0); printf("flag (mate_strand): %i\n", (bam_p->core.flag & BAM_FMREVERSE) ? 1 : 0); printf("flag (pair_num_1): %i\n", (bam_p->core.flag & BAM_FREAD1) ? 1 : 0); printf("flag (pair_num_2): %i\n", (bam_p->core.flag & BAM_FREAD2) ? 1 : 0); printf("flag (primary_alignment): %i\n", (bam_p->core.flag & BAM_FSECONDARY) ? 1 : 0); printf("flag (fails_quality_check): %i\n", (bam_p->core.flag & BAM_FQCFAIL) ? 1 : 0); printf("flag (pc_optical_duplicate): %i\n", (bam_p->core.flag & BAM_FDUP) ? 1 : 0); }
alignment_t* alignment_new_by_bam(bam1_t* bam_p, int base_quality) { //memory allocation for the structure alignment_t* alignment_p = (alignment_t*) calloc(1, sizeof(alignment_t)); //numeric data alignment_p->num_cigar_operations = (int) bam_p->core.n_cigar; alignment_p->chromosome = bam_p->core.tid; alignment_p->position = bam_p->core.pos; alignment_p->mate_chromosome = bam_p->core.mtid; alignment_p->mate_position = bam_p->core.mpos; alignment_p->map_quality = bam_p->core.qual; alignment_p->template_length = bam_p->core.isize; //memory allocation for inner fields according to indicated sizes alignment_p->query_name = (char*) calloc(bam_p->core.l_qname, sizeof(char)); alignment_p->sequence = (char*) calloc(bam_p->core.l_qseq + 1, sizeof(char)); alignment_p->quality = (char*) calloc(bam_p->core.l_qseq + 1, sizeof(char)); //same length as sequence alignment_p->cigar = (char*) calloc(max(MIN_ALLOCATED_SIZE_FOR_CIGAR_STRING, alignment_p->num_cigar_operations << 2), sizeof(char)); alignment_p->optional_fields = (uint8_t*) calloc(bam_p->l_aux, sizeof(uint8_t)); alignment_p->optional_fields_length = bam_p->l_aux; //copy the data between structures strcpy(alignment_p->query_name, bam1_qname(bam_p)); strcpy(alignment_p->sequence, convert_to_sequence_string(bam1_seq(bam_p), bam_p->core.l_qseq)); //char* quality_string = (char *)malloc(sizeof(char)*(quality_length + 1)); convert_to_quality_string_length(alignment_p->quality, bam1_qual(bam_p), bam_p->core.l_qseq, base_quality); //strcpy(alignment_p->quality, quality_string); //free(quality_string); strcpy(alignment_p->cigar, convert_to_cigar_string(bam1_cigar(bam_p), alignment_p->num_cigar_operations)); memcpy(alignment_p->optional_fields, bam1_aux(bam_p), bam_p->l_aux); //flags uint32_t flag = (uint32_t) bam_p->core.flag; alignment_p->is_paired_end = (flag & BAM_FPAIRED) ? 1 : 0; alignment_p->is_paired_end_mapped = (flag & BAM_FPROPER_PAIR) ? 1 : 0; alignment_p->is_seq_mapped = (flag & BAM_FUNMAP) ? 0 : 1; //in bam structure is negative flag!!! alignment_p->is_mate_mapped = (flag & BAM_FMUNMAP) ? 0 : 1; //in bam structure is negative flag!!! alignment_p->seq_strand = (flag & BAM_FREVERSE) ? 1 : 0; alignment_p->mate_strand = (flag & BAM_FMREVERSE) ? 1 : 0; if (flag & BAM_FREAD1) { alignment_p->pair_num = 1; } else if (flag & BAM_FREAD2) { alignment_p->pair_num = 2; } else { alignment_p->pair_num = 0; } alignment_p->primary_alignment = (flag & BAM_FSECONDARY) ? 1 : 0; alignment_p->fails_quality_check = (flag & BAM_FQCFAIL) ? 1 : 0; alignment_p->pc_optical_duplicate = (flag & BAM_FDUP) ? 1 : 0; return alignment_p; }
char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of) { uint8_t *s = bam1_seq(b), *t = bam1_qual(b); int i; const bam1_core_t *c = &b->core; kstring_t str; str.l = str.m = 0; str.s = 0; ksprintf(&str, "%s\t", bam1_qname(b)); if (of == BAM_OFDEC) ksprintf(&str, "%d\t", c->flag); else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag); else { // BAM_OFSTR for (i = 0; i < 16; ++i) if ((c->flag & 1<<i) && bam_flag2char_table[i]) kputc(bam_flag2char_table[i], &str); kputc('\t', &str); } if (c->tid < 0) kputs("*\t", &str); else ksprintf(&str, "%s\t", header->target_name[c->tid]); ksprintf(&str, "%d\t%d\t", c->pos + 1, c->qual); if (c->n_cigar == 0) kputc('*', &str); else { for (i = 0; i < c->n_cigar; ++i) ksprintf(&str, "%d%c", bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, "MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK]); } kputc('\t', &str); if (c->mtid < 0) kputs("*\t", &str); else if (c->mtid == c->tid) kputs("=\t", &str); else ksprintf(&str, "%s\t", header->target_name[c->mtid]); ksprintf(&str, "%d\t%d\t", c->mpos + 1, c->isize); if (c->l_qseq) { for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str); kputc('\t', &str); if (t[0] == 0xff) kputc('*', &str); else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str); } else ksprintf(&str, "*\t*"); s = bam1_aux(b); while (s < b->data + b->data_len) { uint8_t type, key[2]; key[0] = s[0]; key[1] = s[1]; s += 2; type = *s; ++s; ksprintf(&str, "\t%c%c:", key[0], key[1]); if (type == 'A') { ksprintf(&str, "A:%c", *s); ++s; } else if (type == 'C') { ksprintf(&str, "i:%u", *s); ++s; } else if (type == 'c') { ksprintf(&str, "i:%d", *(int8_t*)s); ++s; } else if (type == 'S') { ksprintf(&str, "i:%u", *(uint16_t*)s); s += 2; } else if (type == 's') { ksprintf(&str, "i:%d", *(int16_t*)s); s += 2; } else if (type == 'I') { ksprintf(&str, "i:%u", *(uint32_t*)s); s += 4; } else if (type == 'i') { ksprintf(&str, "i:%d", *(int32_t*)s); s += 4; } else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; } else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { ksprintf(&str, "%c:", type); while (*s) kputc(*s++, &str); ++s; } } return str.s; }
std::string getFlowSignals() const { std::stringstream str ; str << "FZ:B:S"; bool flowSigFound = false; assert(m_dataPtr); const char *tag = "FZ"; uint8_t *s; const bam1_t *b = m_dataPtr.get(); s = bam1_aux(b); int y = tag[0]<<8 | tag[1]; while (s < b->data + b->data_len) { uint8_t type, key[2]; uint8_t sub_type = ' '; key[0] = s[0]; key[1] = s[1]; int x = (int)s[0]<<8 | s[1]; s += 2; type = *s; ++s; if (type == 'A') { ++s; } else if (type == 'C') { ++s; } else if (type == 'c') { ++s; } else if (type == 'S') { s += 2; } else if (type == 's') { s += 2; } else if (type == 'I') { s += 4; } else if (type == 'i') { s += 4; } else if (type == 'f') { s += 4; } else if (type == 'd') { s += 8; } else if (type == 'Z' || type == 'H') { while (*s) s++; ++s; } else if (type == 'B') { sub_type = *(s++); int32_t n; memcpy(&n, s, 4); s += 4; // no point to the start of the array for (int32_t i = 0; i < n; ++i) { if ('c' == sub_type || 'c' == sub_type) { ++s; } else if ('C' == sub_type) { ++s; } else if ('s' == sub_type) { s += 2; } else if ('S' == sub_type) { str << ","; str << *(uint16_t*)s; s += 2; } else if ('i' == sub_type) { s += 4; } else if ('I' == sub_type) { s += 4; } else if ('f' == sub_type) { s += 4; } } } if (x==y && type == 'B' && sub_type == 'S') {flowSigFound = true; break;} } if (flowSigFound) return str.str(); return ""; }
int bam_aux_drop_other(bam1_t *b, uint8_t *s) { if (s) { uint8_t *p, *aux; aux = bam1_aux(b); p = s - 2; __skip_tag(s); memmove(aux, p, s - p); b->data_len -= bam_get_l_aux(b) - (s - p); } else { b->data_len -= bam_get_l_aux(b); } return 0; }
char *bamGetTagString(const bam1_t *bam, char *tag, char *buf, size_t bufSize) /* If bam's tags include the given 2-character tag, place the value into * buf (zero-terminated, trunc'd if nec) and return a pointer to buf, * or NULL if tag is not present. */ { if (tag == NULL) errAbort("NULL tag passed to bamGetTagString"); if (! (isalpha(tag[0]) && isalnum(tag[1]) && tag[2] == '\0')) errAbort("bamGetTagString: invalid tag '%s'", htmlEncode(tag)); char *val = NULL; // adapted from part of bam.c bam_format1: uint8_t *s = bam1_aux(bam); while (s < bam->data + bam->data_len) { uint8_t type, key[2]; key[0] = s[0]; key[1] = s[1]; s += 2; type = *s; ++s; if (key[0] == tag[0] && key[1] == tag[1]) { if (type == 'A') { snprintf(buf, bufSize, "%c", *s);} else if (type == 'C') { snprintf(buf, bufSize, "%u", *s); } else if (type == 'c') { snprintf(buf, bufSize, "%d", *s); } else if (type == 'S') { snprintf(buf, bufSize, "%u", *(uint16_t*)s); } else if (type == 's') { snprintf(buf, bufSize, "%d", *(int16_t*)s); } else if (type == 'I') { snprintf(buf, bufSize, "%u", *(uint32_t*)s); } else if (type == 'i') { snprintf(buf, bufSize, "%d", *(int32_t*)s); } else if (type == 'f') { snprintf(buf, bufSize, "%g", *(float*)s); } else if (type == 'd') { snprintf(buf, bufSize, "%lg", *(double*)s); } else if (type == 'Z' || type == 'H') strncpy(buf, (char *)s, bufSize); else buf[0] = '\0'; buf[bufSize-1] = '\0'; // TODO: is this nec?? see man pages val = buf; break; } else { if (type == 'A' || type == 'C' || type == 'c') { ++s; } else if (type == 'S' || type == 's') { s += 2; } else if (type == 'I' || type == 'i' || type == 'f') { s += 4; } else if (type == 'd') { s += 8; } else if (type == 'Z' || type == 'H') { while (*s++); } } } return val; }
char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of) { uint8_t *s = bam1_seq(b), *t = bam1_qual(b); int i; const bam1_core_t *c = &b->core; kstring_t str; str.l = str.m = 0; str.s = 0; kputsn(bam1_qname(b), c->l_qname-1, &str); kputc('\t', &str); if (of == BAM_OFDEC) { kputw(c->flag, &str); kputc('\t', &str); } else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag); else { // BAM_OFSTR for (i = 0; i < 16; ++i) if ((c->flag & 1<<i) && bam_flag2char_table[i]) kputc(bam_flag2char_table[i], &str); kputc('\t', &str); } if (c->tid < 0) kputsn("*\t", 2, &str); else { if (header) kputs(header->target_name[c->tid] , &str); else kputw(c->tid, &str); kputc('\t', &str); } kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str); if (c->n_cigar == 0) kputc('*', &str); else { for (i = 0; i < c->n_cigar; ++i) { kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str); kputc("MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str); } } kputc('\t', &str); if (c->mtid < 0) kputsn("*\t", 2, &str); else if (c->mtid == c->tid) kputsn("=\t", 2, &str); else { if (header) kputs(header->target_name[c->mtid], &str); else kputw(c->mtid, &str); kputc('\t', &str); } kputw(c->mpos + 1, &str); kputc('\t', &str); kputw(c->isize, &str); kputc('\t', &str); if (c->l_qseq) { for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str); kputc('\t', &str); if (t[0] == 0xff) kputc('*', &str); else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str); } else kputsn("*\t*", 3, &str); s = bam1_aux(b); while (s < b->data + b->data_len) { uint8_t type, key[2]; key[0] = s[0]; key[1] = s[1]; s += 2; type = *s; ++s; kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str); if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; } else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; } else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; } else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; } else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; } else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; } else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; } else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; } else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; } else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; } else if (type == 'B') { uint8_t sub_type = *(s++); int32_t n; memcpy(&n, s, 4); s += 4; // no point to the start of the array kputc(type, &str); kputc(':', &str); kputc(sub_type, &str); // write the typing for (i = 0; i < n; ++i) { kputc(',', &str); if ('c' == sub_type || 'c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; } else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; } else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; } else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; } else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; } else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; } else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; } } } } return str.s; }
void tmap_sam_update_cigar_and_md(bam1_t *b, char *ref, char *read, int32_t len) { int32_t i, n_cigar, last_type; uint32_t *cigar; int32_t diff; int32_t soft_clip_start_i, soft_clip_end_i; if(b->data_len - b->l_aux != bam1_aux(b) - b->data) { tmap_error("b->data_len - b->l_aux != bam1_aux(b) - b->data", Exit, OutOfRange); } // keep track of soft clipping n_cigar = soft_clip_start_i = soft_clip_end_i = 0; cigar = bam1_cigar(b); if(BAM_CSOFT_CLIP == TMAP_SW_CIGAR_OP(cigar[0])) { soft_clip_start_i = 1; n_cigar++; } if(1 < b->core.n_cigar && BAM_CSOFT_CLIP == TMAP_SW_CIGAR_OP(cigar[b->core.n_cigar-1])) { soft_clip_end_i = 1; n_cigar++; } cigar = NULL; // get the # of cigar operators last_type = tmap_sam_get_type(ref[0], read[0]); n_cigar++; for(i=1;i<len;i++) { int32_t cur_type = tmap_sam_get_type(ref[i], read[i]); if(cur_type != last_type) { n_cigar++; } last_type = cur_type; } // resize the data field if necessary if(n_cigar < b->core.n_cigar) { diff = sizeof(uint32_t) * (b->core.n_cigar - n_cigar); // shift down for(i=b->core.l_qname;i<b->data_len - diff;i++) { b->data[i] = b->data[i + diff]; } b->data_len -= diff; b->core.n_cigar = n_cigar; } else if(b->core.n_cigar < n_cigar) { diff = sizeof(uint32_t) * (n_cigar - b->core.n_cigar); // realloc if(b->m_data <= (b->data_len + diff)) { b->m_data = b->data_len + diff + 1; tmap_roundup32(b->m_data); b->data = tmap_realloc(b->data, sizeof(uint8_t) * b->m_data, "b->data"); } // shift up for(i=b->data_len-1;b->core.l_qname<=i;i--) { b->data[i + diff] = b->data[i]; } b->data_len += diff; b->core.n_cigar = n_cigar; } if(b->data_len - b->l_aux != bam1_aux(b) - b->data) { tmap_error("b->data_len - b->l_aux != bam1_aux(b) - b->data", Exit, OutOfRange); } // create the cigar cigar = bam1_cigar(b); for(i=soft_clip_start_i;i<n_cigar-soft_clip_end_i;i++) { cigar[i] = 0; } n_cigar = soft_clip_start_i; // skip over soft clipping etc. last_type = tmap_sam_get_type(ref[0], read[0]); TMAP_SW_CIGAR_STORE(cigar[n_cigar], last_type, 1); for(i=1;i<len;i++) { int32_t cur_type = tmap_sam_get_type(ref[i], read[i]); if(cur_type == last_type) { // add to the cigar length TMAP_SW_CIGAR_ADD_LENGTH(cigar[n_cigar], 1); } else { // add to the cigar n_cigar++; TMAP_SW_CIGAR_STORE(cigar[n_cigar], cur_type, 1); } last_type = cur_type; } // Note: the md tag must be updated tmap_sam_md1(b, ref, len); }