// FIXME: we should also check the LB tag associated with each alignment const char *bam_get_library(bam_header_t *h, const bam1_t *b) { const uint8_t *rg; if (h->dict == 0) h->dict = sam_header_parse2(h->text); if (h->rg2lib == 0) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB"); rg = bam_aux_get(b, "RG"); return (rg == 0)? 0 : sam_tbl_get(h->rg2lib, (const char*)(rg + 1)); }
bam_header_t *bam_header_dup(const bam_header_t *h0) { bam_header_t *h; int i; h = bam_header_init(); *h = *h0; h->hash = h->rg2lib = 0; h->header = 0; h->text = (char*)calloc(h->l_text + 1, 1); memcpy(h->text, h0->text, h->l_text); h->target_len = (uint32_t*)calloc(h->n_targets, 4); h->target_name = (char**)calloc(h->n_targets, sizeof(void*)); for (i = 0; i < h->n_targets; ++i) { h->target_len[i] = h0->target_len[i]; h->target_name[i] = strdup(h0->target_name[i]); } if (h->header == 0) h->header = sam_header_parse2(h->text); return h; }
samfile_t* b2g_samfile_open(char* path, char* mode, void* aux, ...) { char filename[1024] = {}; va_list args; va_start(args, aux); vsprintf(filename, path, args); va_end(args); if ('r' == mode[0] && access(filename, R_OK)) return NULL; samfile_t *in = samopen(filename, mode, aux); if (in) { if (!in->header) return NULL; // Make sure to parse the header for later use. bam_header_t *header = in->header; if (header->dict == 0) header->dict = sam_header_parse2(header->text); if (header->rg2lib == 0) header->rg2lib = sam_header2tbl(header->dict, "RG", "ID", "LB"); } return in; }
bam_header_t * tmap_seqs_io_to_bam_header(tmap_refseq_t *refseq, tmap_seqs_io_t *io_in, char **rg_sam, int32_t rg_sam_num, int32_t argc, char *argv[]) { bam_header_t *bam_header = NULL; sam_header_t *header = NULL; // the output header sam_header_record_t *record = NULL; sam_header_record_t **record_list = NULL; char tag[2]; char *command_line= NULL; char *id = NULL; char *id_pp = NULL; int32_t i, j; // @HD if(io_in->type == TMAP_SEQ_TYPE_SAM || io_in->type == TMAP_SEQ_TYPE_BAM) { // should be only one input file if(1 != io_in->n) { tmap_bug(); } // get the current header if(NULL == io_in->seqios[0]) tmap_bug(); if(NULL == io_in->seqios[0]->io.samio) tmap_bug(); if(NULL == io_in->seqios[0]->io.samio->fp->header) tmap_bug(); if(NULL == io_in->seqios[0]->io.samio->fp->header->header) { header = sam_header_parse2(io_in->seqios[0]->io.samio->fp->header->text); } else { header = io_in->seqios[0]->io.samio->fp->header->header; // wow, that's a lot of pointers if(NULL == header) tmap_bug(); header = sam_header_clone(header); // clone the header } if(NULL == header) tmap_bug(); } else { // empty header header = sam_header_init(); // @HD - header line record = sam_header_record_init("HD"); // new header line if(0 == sam_header_record_add(record, "VN", "1.4")) tmap_bug(); // version number if(0 == sam_header_add_record(header, record)) tmap_bug(); // add the header line // nullify record = NULL; } // Get the TMAP program ID id = tmap_malloc(sizeof(char) * (1 + strlen(PACKAGE_NAME)), "id"); strcpy(id, PACKAGE_NAME); // default for(i=j=0;NULL != (record_list = sam_header_get_record(header, "PG", "ID", id, &i)) && 0 < i;i=0) { // while the id is found char *ptr = NULL; // swap id and id_pp ptr = id_pp; id_pp = id; id = ptr; // create the new ID j++; id = tmap_realloc(id, sizeof(char) * (1 + (int)log10(j) + 1 + strlen(PACKAGE_NAME) + 1), "id"); if(sprintf(id, "%s.%d", PACKAGE_NAME, j) < 0) tmap_bug(); free(record_list); record_list = NULL; } // @SQ if(NULL != refseq) { sam_header_records_t *records = NULL; // NB: check to see if any SQ/SN records exist, if not, then ignore checking... // ZZ: We will not checking, but instead just remove all the old header. The old way of checking is not working records = sam_header_get_records(header, "SQ"); if (NULL != records) { // ZZ: remove the headers if exists. sam_header_remove_records(header, "SQ"); records = NULL; } // ZZ: Now we will just add all new tags for(i=0;i<refseq->num_annos;i++) { // for each reference sequence char num[32]; record = sam_header_record_init("SQ"); // new reference sequence record if(0 == sam_header_record_add(record, "SN", refseq->annos[i].name->s)) tmap_bug(); // reference sequence name if(sprintf(num, "%u", (uint32_t)refseq->annos[i].len) < 0) tmap_bug(); // integer to string if(0 == sam_header_record_add(record, "LN", num)) tmap_bug(); // reference sequence length if(0 == sam_header_add_record(header, record)) tmap_bug(); // add the reference sequence record } } // @RG - read group if(0 < rg_sam_num) { // @RG specified on the command line // Check for SAM/BAM // TODO: this should be possible... if(io_in->type == TMAP_SEQ_TYPE_SAM || io_in->type == TMAP_SEQ_TYPE_BAM) { tmap_error("Cannot specify the read groups on the command line when using SAM/BAM as input." " Please embed in the SAM/BAM header instead.", Exit, OutOfRange); } record = NULL; // go through the command line arguments for(i=0;i<rg_sam_num;i++) { if(strlen(rg_sam[i]) < 4) tmap_error("Read group too small", Exit, OutOfRange); if(':' != rg_sam[i][2]) tmap_error("Read group improperly formatted (no colon)", Exit, OutOfRange); // check for id if('I' == rg_sam[i][0] && 'D' == rg_sam[i][1]) { // new read group if(NULL != record) { // add the record tmap_seqs_io_init2_fs_and_add(io_in, header, record); // add @RG.KS and @RG.FO } record = sam_header_record_init("RG"); // new read group } // add the tag/value to the record if(NULL == record) { tmap_error("The read group ID must be specified first", Exit, OutOfRange); } tag[0]=rg_sam[i][0]; tag[1]=rg_sam[i][1]; // setup the tag if(0 == sam_header_record_add(record, tag, rg_sam[i]+3)) tmap_bug(); // add the tag/value } if(NULL != record) { // add the record tmap_seqs_io_init2_fs_and_add(io_in, header, record); // add @RG.KS and @RG.FO } // check that the # of read groups added was the same as the # of input files... sam_header_records_t *records = sam_header_get_records(header, "RG"); // get the header line if(records->n != io_in->n) tmap_error("The number of read groups did not match the number of input files", Exit, OutOfRange); } else if(io_in->type != TMAP_SEQ_TYPE_SAM && io_in->type != TMAP_SEQ_TYPE_BAM) { // dummy... for(i=0;i<io_in->n;i++) { // for each input file char buf[32]; record = sam_header_record_init("RG"); // new read group if(1 == io_in->n) strcpy(buf, "NOID"); else if(sprintf(buf, "NOID.%d", i+1) < 0) tmap_bug(); if(0 == sam_header_record_add(record, "ID", buf)) tmap_bug(); // dummy ID if(0 == sam_header_record_add(record, "SM", "NOSM")) tmap_bug(); // dummy SM, for Picard validation if(0 == sam_header_record_add(record, "PG", id)) tmap_bug(); // dummy PG tmap_seqs_io_init2_fs_and_add(io_in, header, record); // add @RG.KS and @RG.FO } } else { // check that SM/PG are present sam_header_records_t *records = sam_header_get_records(header, "RG"); // get the header line for(i=0;i<records->n;i++) { record = records->records[i]; if(NULL == sam_header_record_get(record, "ID")) tmap_error("Missing @RG.ID in the SAM/BAM Header", Exit, OutOfRange); if(NULL == sam_header_record_get(record, "SM")) { if(0 == sam_header_record_add(record, "SM", "NOSM")) tmap_bug(); // dummy SM, for Picard validation } if(NULL == sam_header_record_get(record, "PG")) { if(0 == sam_header_record_add(record, "PG", id)) tmap_bug(); // dummy PG } } } // @PG - program group // TODO: check for previous program group ID and set @PG.PP record = sam_header_record_init("PG"); // new program group if(0 == sam_header_record_add(record, "ID", id)) tmap_bug(); // @PG.ID if(0 == sam_header_record_add(record, "VN", PACKAGE_VERSION)) tmap_bug(); // @PG.VN // @PG.CL command_line = NULL; j = 1; // for the EOL command_line = tmap_realloc(command_line, sizeof(char) * j, "command_line"); command_line[j-1] = '\0'; for(i=0;i<argc;i++) { if(0 < i) j++; j += strlen(argv[i]); command_line = tmap_realloc(command_line, sizeof(char) * j, "command_line"); if(0 < i) strcat(command_line, " "); strcat(command_line, argv[i]); command_line[j-1] = '\0'; } if(0 == sam_header_record_add(record, "CL", command_line)) tmap_bug(); // @PG.CL if(NULL != id_pp) { // @PG.PP if(0 == sam_header_record_add(record, "PP", id_pp)) tmap_bug(); // @PG.CL } if(0 == sam_header_add_record(header, record)) tmap_bug(); // add the record free(command_line); // Check the new SAM Header if(0 == sam_header_check(header)) { tmap_error("SAM Header was not consistent", Exit, OutOfRange); } // Create a BAM Header from the SAM Header bam_header = bam_header_init(); // empty bam_header->header = header; // soft-copy the header bam_header = sam_header_to_bam_header(bam_header); // convert // free memory free(id); free(id_pp); return bam_header; }