int beds_database_add(struct beds_options *opts, const char *fname, char *columns) { if ( opts->n_files == opts->m_files ) { opts->m_files = opts->m_files == 0 ? 2 : opts->m_files +2; opts->files = (struct beds_anno_file*)realloc(opts->files, opts->m_files*sizeof(struct beds_anno_file)); } struct beds_anno_file *file = &opts->files[opts->n_files]; memset(file, 0, sizeof(struct beds_anno_file)); file->id = opts->n_files; file->fname = strdup(fname); file->fp = hts_open(fname, "r"); if (file->fp == NULL) error("Failed to open %s : %s", fname, strerror(errno)); // int n; file->idx = tbx_index_load(fname); if ( file->idx == NULL) error("Failed to load index of %s.", fname); opts->n_files++; file->last_id = -1; file->last_start = -1; file->last_end = -1; kstring_t string = KSTRING_INIT; int no_columns = 0; int i; if ( columns == NULL && file->no_such_chrom == 0) { warnings("No columns string specified for %s. Will annotate all tags in this data.", fname); file->no_such_chrom = 1; no_columns = 1; } else { int *splits = NULL; kputs(columns, &string); int nfields; splits = ksplit(&string, ',', &nfields); file->m_cols = nfields; file->cols = (struct anno_col*)malloc(sizeof(struct anno_col) * file->m_cols); for ( i = 0; i < nfields; ++i ) { char *ss = string.s + splits[i]; struct anno_col *col = &file->cols[file->n_cols]; col->icol = i; col->replace = REPLACE_MISSING; if (*ss == '+') { col->replace = REPLACE_MISSING; ss++; } else if ( *ss == '-' ) { col->replace = REPLACE_EXISTING; ss++; } if (ss[0] == '\0') continue; if ( strncmp(ss, "INFO/", 5) == 0) ss += 5; col->hdr_key = strdup(ss); col->icol = -1; // debug_print("%s, %d", col->hdr_key, file->n_cols); file->n_cols++; } string.l = 0; } while (1) { string.l =0; if ( hts_getline(file->fp, KS_SEP_LINE, &string) < 0 ) break; // only accept header line in the beginning for file if ( string.s[0] != '#' ) break; if ( strncmp(string.s, "##INFO=", 7) == 0) { char *ss = string.s + 11; char *se = ss; while (se && *se != ',') se++; struct anno_col *col = NULL; // if no column string specified, init all header lines if ( no_columns ) { if ( file->n_cols == file->m_cols ) { file->m_cols = file->m_cols == 0 ? 2 : file->m_cols + 2; file->cols = (struct anno_col *) realloc(file->cols, file->m_cols*sizeof(struct anno_col)); } col = &file->cols[file->n_cols++]; col->icol = -1; col->hdr_key = strndup(ss, se-ss+1); col->hdr_key[se-ss] = '\0'; } else { for ( i = 0; i < file->n_cols; ++i ) { if ( strncmp(file->cols[i].hdr_key, ss, se-ss) == 0) break; } // if header line is not set in the column string, skip if ( i == file->n_cols ) continue; col = &file->cols[i]; } // specify setter functions here col->setter.bed = beds_setter_info_string; bcf_hdr_append(opts->hdr_out, string.s); bcf_hdr_sync(opts->hdr_out); int hdr_id = bcf_hdr_id2int(opts->hdr_out, BCF_DT_ID,col->hdr_key); assert ( bcf_hdr_idinfo_exists(opts->hdr_out, BCF_HL_INFO, hdr_id) ); } string.l = 0; // set column number for each col if ( strncasecmp(string.s, "#chr", 4) == 0) { int nfields; int *splits = ksplit(&string, '\t', &nfields); if (nfields < 4) { fprintf(stderr, "[error] Bad header of bed database : %s. n_fields : %d, %s", fname, nfields, string.s); fprintf(stderr, "[notice] this error usually happened because the header line is seperated by spaces but not tab!"); exit(1); } int k; for ( k = 3; k < nfields; ++k ) { char *ss = string.s + splits[k]; for (i = 0; i < file->n_cols; ++i ) { struct anno_col *col = &file->cols[i]; if ( strcmp(col->hdr_key, ss) == 0) break; } // if name line specify more names than column string or header, skip if ( i == file->n_cols ) continue; struct anno_col *col = &file->cols[i]; col->icol = k; } } } for ( i = 0; i < file->n_cols; ++i ) { struct anno_col *col = &file->cols[i]; if ( col->hdr_key && col->icol == -1 ) error("No column %s found in bed database : %s", col->hdr_key, fname); int hdr_id = bcf_hdr_id2int(opts->hdr_out, BCF_DT_ID, col->hdr_key); assert(hdr_id>-1); col->number = bcf_hdr_id2length(opts->hdr_out, BCF_HL_INFO, hdr_id); if ( col->number == BCF_VL_A || col->number == BCF_VL_R || col->number == BCF_VL_G) error("Only support fixed INFO number for bed database. %s", col->hdr_key); col->ifile = file->id; } if ( string.m ) free(string.s); if ( opts->beds_is_inited == 0 ) opts->beds_is_inited = 1; return 0; }
void bcf_remove_alleles(const bcf_hdr_t *header, bcf1_t *line, int rm_mask) { int *map = (int*) calloc(line->n_allele, sizeof(int)); // create map of indexes from old to new ALT numbering and modify ALT kstring_t str = {0,0,0}; kputs(line->d.allele[0], &str); int nrm = 0, i,j; // i: ori alleles, j: new alleles for (i=1, j=1; i<line->n_allele; i++) { if ( rm_mask & 1<<i ) { // remove this allele line->d.allele[i] = NULL; nrm++; continue; } kputc(',', &str); kputs(line->d.allele[i], &str); map[i] = j; j++; } if ( !nrm ) { free(map); free(str.s); return; } int nR_ori = line->n_allele; int nR_new = line->n_allele-nrm; assert(nR_new > 0); // should not be able to remove reference allele int nA_ori = nR_ori-1; int nA_new = nR_new-1; int nG_ori = nR_ori*(nR_ori + 1)/2; int nG_new = nR_new*(nR_new + 1)/2; bcf_update_alleles_str(header, line, str.s); // remove from Number=G, Number=R and Number=A INFO fields. uint8_t *dat = NULL; int mdat = 0, ndat = 0, mdat_bytes = 0, nret; for (i=0; i<line->n_info; i++) { bcf_info_t *info = &line->d.info[i]; int vlen = bcf_hdr_id2length(header,BCF_HL_INFO,info->key); if ( vlen!=BCF_VL_A && vlen!=BCF_VL_G && vlen!=BCF_VL_R ) continue; // no need to change int type = bcf_hdr_id2type(header,BCF_HL_INFO,info->key); if ( type==BCF_HT_FLAG ) continue; int size = 1; if ( type==BCF_HT_REAL || type==BCF_HT_INT ) size = 4; mdat = mdat_bytes / size; nret = bcf_get_info_values(header, line, bcf_hdr_int2id(header,BCF_DT_ID,info->key), (void**)&dat, &mdat, type); mdat_bytes = mdat * size; if ( nret<0 ) { fprintf(stderr,"[%s:%d %s] Could not access INFO/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__, bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname(header,line), line->pos+1, nret); exit(1); } if ( type==BCF_HT_STR ) { str.l = 0; char *ss = (char*) dat, *se = (char*) dat; if ( vlen==BCF_VL_A || vlen==BCF_VL_R ) { int nexp, inc = 0; if ( vlen==BCF_VL_A ) { nexp = nA_ori; inc = 1; } else nexp = nR_ori; for (j=0; j<nexp; j++) { if ( !*se ) break; while ( *se && *se!=',' ) se++; if ( rm_mask & 1<<(j+inc) ) { if ( *se ) se++; ss = se; continue; } if ( str.l ) kputc(',',&str); kputsn(ss,se-ss,&str); if ( *se ) se++; ss = se; } assert( j==nexp ); } else // Number=G, assuming diploid genotype { int k = 0, n = 0; for (j=0; j<nR_ori; j++) { for (k=0; k<=j; k++) { if ( !*se ) break; while ( *se && *se!=',' ) se++; n++; if ( rm_mask & 1<<j || rm_mask & 1<<k ) { if ( *se ) se++; ss = se; continue; } if ( str.l ) kputc(',',&str); kputsn(ss,se-ss,&str); if ( *se ) se++; ss = se; } if ( !*se ) break; } assert( n=nG_ori ); } nret = bcf_update_info(header, line, bcf_hdr_int2id(header,BCF_DT_ID,info->key), (void*)str.s, str.l, type); if ( nret<0 ) { fprintf(stderr,"[%s:%d %s] Could not update INFO/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__, bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname(header,line), line->pos+1, nret); exit(1); } continue; } if ( vlen==BCF_VL_A || vlen==BCF_VL_R ) { int inc = 0, ntop; if ( vlen==BCF_VL_A ) { assert( nret==nA_ori ); ntop = nA_ori; ndat = nA_new; inc = 1; } else { assert( nret==nR_ori ); ntop = nR_ori; ndat = nR_new; } int k = 0; #define BRANCH(type_t,is_vector_end) \ { \ type_t *ptr = (type_t*) dat; \ int size = sizeof(type_t); \ for (j=0; j<ntop; j++) /* j:ori, k:new */ \ { \ if ( is_vector_end ) { memcpy(dat+k*size, dat+j*size, size); break; } \ if ( rm_mask & 1<<(j+inc) ) continue; \ if ( j!=k ) memcpy(dat+k*size, dat+j*size, size); \ k++; \ } \ } switch (type) { case BCF_HT_INT: BRANCH(int32_t,ptr[j]==bcf_int32_vector_end); break; case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr[j])); break; } #undef BRANCH } else // Number=G { assert( nret==nG_ori ); int k, l_ori = -1, l_new = 0; ndat = nG_new; #define BRANCH(type_t,is_vector_end) \ { \ type_t *ptr = (type_t*) dat; \ int size = sizeof(type_t); \ for (j=0; j<nR_ori; j++) \ { \ for (k=0; k<=j; k++) \ { \ l_ori++; \ if ( is_vector_end ) { memcpy(dat+l_new*size, dat+l_ori*size, size); break; } \ if ( rm_mask & 1<<j || rm_mask & 1<<k ) continue; \ if ( l_ori!=l_new ) memcpy(dat+l_new*size, dat+l_ori*size, size); \ l_new++; \ } \ } \ } switch (type) { case BCF_HT_INT: BRANCH(int32_t,ptr[l_ori]==bcf_int32_vector_end); break; case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr[l_ori])); break; } #undef BRANCH } nret = bcf_update_info(header, line, bcf_hdr_int2id(header,BCF_DT_ID,info->key), (void*)dat, ndat, type); if ( nret<0 ) { fprintf(stderr,"[%s:%d %s] Could not update INFO/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__, bcf_hdr_int2id(header,BCF_DT_ID,info->key), bcf_seqname(header,line), line->pos+1, nret); exit(1); } } // Update GT fields, the allele indexes might have changed for (i=1; i<line->n_allele; i++) if ( map[i]!=i ) break; if ( i<line->n_allele ) { mdat = mdat_bytes / 4; // sizeof(int32_t) nret = bcf_get_genotypes(header,line,(void**)&dat,&mdat); mdat_bytes = mdat * 4; if ( nret>0 ) { nret /= line->n_sample; int32_t *ptr = (int32_t*) dat; for (i=0; i<line->n_sample; i++) { for (j=0; j<nret; j++) { if ( ptr[j]==bcf_gt_missing ) continue; if ( ptr[j]==bcf_int32_vector_end ) break; int al = bcf_gt_allele(ptr[j]); assert( al<nR_ori && map[al]>=0 ); ptr[j] = (map[al]+1)<<1 | (ptr[j]&1); } ptr += nret; } bcf_update_genotypes(header, line, (void*)dat, nret*line->n_sample); } } // Remove from Number=G, Number=R and Number=A FORMAT fields. // Assuming haploid or diploid GTs for (i=0; i<line->n_fmt; i++) { bcf_fmt_t *fmt = &line->d.fmt[i]; int vlen = bcf_hdr_id2length(header,BCF_HL_FMT,fmt->id); if ( vlen!=BCF_VL_A && vlen!=BCF_VL_G && vlen!=BCF_VL_R ) continue; // no need to change int type = bcf_hdr_id2type(header,BCF_HL_FMT,fmt->id); if ( type==BCF_HT_FLAG ) continue; int size = 1; if ( type==BCF_HT_REAL || type==BCF_HT_INT ) size = 4; mdat = mdat_bytes / size; nret = bcf_get_format_values(header, line, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), (void**)&dat, &mdat, type); mdat_bytes = mdat * size; if ( nret<0 ) { fprintf(stderr,"[%s:%d %s] Could not access FORMAT/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname(header,line), line->pos+1, nret); exit(1); } if ( type==BCF_HT_STR ) { int size = nret/line->n_sample; // number of bytes per sample str.l = 0; if ( vlen==BCF_VL_A || vlen==BCF_VL_R ) { int nexp, inc = 0; if ( vlen==BCF_VL_A ) { nexp = nA_ori; inc = 1; } else nexp = nR_ori; for (j=0; j<line->n_sample; j++) { char *ss = ((char*)dat) + j*size, *se = ss + size, *ptr = ss; int k_src = 0, k_dst = 0, l = str.l; for (k_src=0; k_src<nexp; k_src++) { if ( ptr>=se || !*ptr) break; while ( ptr<se && *ptr && *ptr!=',' ) ptr++; if ( rm_mask & 1<<(k_src+inc) ) { ss = ++ptr; continue; } if ( k_dst ) kputc(',',&str); kputsn(ss,ptr-ss,&str); ss = ++ptr; k_dst++; } assert( k_src==nexp ); l = str.l - l; for (; l<size; l++) kputc(0, &str); } } else // Number=G, diploid or haploid { for (j=0; j<line->n_sample; j++) { char *ss = ((char*)dat) + j*size, *se = ss + size, *ptr = ss; int k_src = 0, k_dst = 0, l = str.l; int nexp = 0; // diploid or haploid? while ( ptr<se ) { if ( !*ptr ) break; if ( *ptr==',' ) nexp++; ptr++; } if ( ptr!=ss ) nexp++; assert( nexp==nG_ori || nexp==nR_ori ); ptr = ss; if ( nexp==nG_ori ) // diploid { int ia, ib; for (ia=0; ia<nR_ori; ia++) { for (ib=0; ib<=ia; ib++) { if ( ptr>=se || !*ptr ) break; while ( ptr<se && *ptr && *ptr!=',' ) ptr++; if ( rm_mask & 1<<ia || rm_mask & 1<<ib ) { ss = ++ptr; continue; } if ( k_dst ) kputc(',',&str); kputsn(ss,ptr-ss,&str); ss = ++ptr; k_dst++; } if ( ptr>=se || !*ptr ) break; } } else // haploid { for (k_src=0; k_src<nR_ori; k_src++) { if ( ptr>=se || !*ptr ) break; while ( ptr<se && *ptr && *ptr!=',' ) ptr++; if ( rm_mask & 1<<k_src ) { ss = ++ptr; continue; } if ( k_dst ) kputc(',',&str); kputsn(ss,ptr-ss,&str); ss = ++ptr; k_dst++; } assert( k_src==nR_ori ); l = str.l - l; for (; l<size; l++) kputc(0, &str); } } } nret = bcf_update_format(header, line, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), (void*)str.s, str.l, type); if ( nret<0 ) { fprintf(stderr,"[%s:%d %s] Could not update FORMAT/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname(header,line), line->pos+1, nret); exit(1); } continue; } int nori = nret / line->n_sample; if ( vlen==BCF_VL_A || vlen==BCF_VL_R || (vlen==BCF_VL_G && nori==nR_ori) ) // Number=A, R or haploid Number=G { int ntop, inc = 0; if ( vlen==BCF_VL_A ) { assert( nori==nA_ori ); // todo: will fail if all values are missing ntop = nA_ori; ndat = nA_new*line->n_sample; inc = 1; } else { assert( nori==nR_ori ); // todo: will fail if all values are missing ntop = nR_ori; ndat = nR_new*line->n_sample; } #define BRANCH(type_t,is_vector_end) \ { \ for (j=0; j<line->n_sample; j++) \ { \ type_t *ptr_src = ((type_t*)dat) + j*nori; \ type_t *ptr_dst = ((type_t*)dat) + j*nA_new; \ int size = sizeof(type_t); \ int k_src, k_dst = 0; \ for (k_src=0; k_src<ntop; k_src++) \ { \ if ( is_vector_end ) { memcpy(ptr_dst+k_dst, ptr_src+k_src, size); break; } \ if ( rm_mask & 1<<(k_src+inc) ) continue; \ if ( k_src!=k_dst ) memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \ k_dst++; \ } \ } \ } switch (type) { case BCF_HT_INT: BRANCH(int32_t,ptr_src[k_src]==bcf_int32_vector_end); break; case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr_src[k_src])); break; } #undef BRANCH } else // Number=G, diploid or mixture of haploid+diploid { assert( nori==nG_ori ); ndat = nG_new*line->n_sample; #define BRANCH(type_t,is_vector_end) \ { \ for (j=0; j<line->n_sample; j++) \ { \ type_t *ptr_src = ((type_t*)dat) + j*nori; \ type_t *ptr_dst = ((type_t*)dat) + j*nG_new; \ int size = sizeof(type_t); \ int ia, ib, k_dst = 0, k_src; \ int nset = 0; /* haploid or diploid? */ \ for (k_src=0; k_src<nG_ori; k_src++) { if ( is_vector_end ) break; nset++; } \ if ( nset==nR_ori ) /* haploid */ \ { \ for (k_src=0; k_src<nR_ori; k_src++) \ { \ if ( rm_mask & 1<<k_src ) continue; \ if ( k_src!=k_dst ) memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \ k_dst++; \ } \ memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \ } \ else /* diploid */ \ { \ k_src = -1; \ for (ia=0; ia<nR_ori; ia++) \ { \ for (ib=0; ib<=ia; ib++) \ { \ k_src++; \ if ( is_vector_end ) { memcpy(ptr_dst+k_dst, ptr_src+k_src, size); ia = nR_ori; break; } \ if ( rm_mask & 1<<ia || rm_mask & 1<<ib ) continue; \ if ( k_src!=k_dst ) memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \ k_dst++; \ } \ } \ } \ } \ } switch (type) { case BCF_HT_INT: BRANCH(int32_t,ptr_src[k_src]==bcf_int32_vector_end); break; case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr_src[k_src])); break; } #undef BRANCH } nret = bcf_update_format(header, line, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), (void*)dat, ndat, type); if ( nret<0 ) { fprintf(stderr,"[%s:%d %s] Could not update FORMAT/%s at %s:%d [%d]\n", __FILE__,__LINE__,__FUNCTION__, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname(header,line), line->pos+1, nret); exit(1); } } free(dat); free(str.s); free(map); }
// only if annotation database is VCF/BCF file, header_in has values or else header_in == NULL anno_col_t *init_columns(const char *rules, bcf_hdr_t *header_in, bcf_hdr_t *header_out, int *ncols, enum anno_type type) { assert(rules != NULL); if (type == anno_is_vcf && header_in == NULL) { error("Inconsistent file type!"); } char *ss = (char*)rules, *se = ss; int nc = 0; anno_col_t *cols = NULL; kstring_t tmp = KSTRING_INIT; kstring_t str = KSTRING_INIT; int i = -1; while (*ss) { if ( *se && *se!=',' ) { se++; continue; } int replace = REPLACE_ALL; if ( *ss=='+') { replace = REPLACE_MISSING; ss++; } else if (*ss=='-') { replace = REPLACE_EXISTING; ss++; } i++; str.l = 0; kputsn(ss, se-ss, &str); if ( !str.s[0] ) { warnings("Empty tag in %s", rules); } else if ( !strcasecmp("CHROM", str.s) || !strcasecmp("POS", str.s) || !strcasecmp("FROM", str.s) || !strcasecmp("TO", str.s) || !strcasecmp("REF", str.s) || !strcasecmp("ALT", str.s) || !strcasecmp("FILTER", str.s) || !strcasecmp("QUAL", str.s)) { warnings("Skip tag %s", str.s); } else if ( !strcasecmp("ID", str.s) ) { nc++; cols = (struct anno_col*) realloc(cols, sizeof(struct anno_col)* (nc)); struct anno_col *col = &cols[nc-1]; col->icol = i; col->replace = replace; col->setter = type == anno_is_vcf ? vcf_setter_id : setter_id; col->hdr_key = strdup(str.s); } else if (!strcasecmp("INFO", str.s) || !strcasecmp("FORMAT", str.s) ) { error("do not support annotate all INFO,FORMAT fields. todo INFO/TAG instead\n"); } else if (!strncasecmp("FORMAT/", str.s, 7) || !strncasecmp("FMT/", str.s, 4)) { char *key = str.s + (!strncasecmp("FMT", str.s, 4) ? 4 : 7); if (!strcasecmp("GT", key)) error("It is not allowed to change GT tag."); int hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, str.s); if ( !bcf_hdr_idinfo_exists(header_out, BCF_HL_FMT, hdr_id) ) { if ( type == anno_is_vcf ) { bcf_hrec_t *hrec = bcf_hdr_get_hrec(header_in, BCF_HL_FMT, "ID", str.s, NULL); if ( !hrec ) error("The tag \"%s\" is not defined in header: %s\n", str.s, rules); tmp.l = 0; bcf_hrec_format(hrec, &tmp); bcf_hdr_append(header_out, tmp.s); bcf_hdr_sync(header_out); hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, str.s); assert( bcf_hdr_idinfo_exists(header_out, BCF_HL_FMT, hdr_id) ); } else { error("The tag \"%s\" is not defined in header: %s\n", str.s, rules); } } //int hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, key); nc++; cols = (struct anno_col*) realloc(cols, sizeof(struct anno_col)*(nc)); struct anno_col *col = &cols[nc-1]; col->icol = -1; col->replace = replace; col->hdr_key = strdup(key); switch ( bcf_hdr_id2type(header_out, BCF_HL_FMT, hdr_id) ) { case BCF_HT_INT: col->setter = type == anno_is_vcf ? vcf_setter_format_int : setter_format_int; break; case BCF_HT_REAL: col->setter = type == anno_is_vcf ? vcf_setter_format_real : setter_format_real; break; case BCF_HT_STR: col->setter = type == anno_is_vcf ? vcf_setter_format_str : setter_format_str; break; default : error("The type of %s not recognised (%d)\n", str.s, bcf_hdr_id2type(header_out, BCF_HL_FMT, hdr_id)); } } else if ( !strncasecmp("INFO/", str.s, 5) ) { memmove(str.s, str.s+5, str.l-4); str.l -= 4; int hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, str.s); if ( !bcf_hdr_idinfo_exists(header_out, BCF_HL_INFO, hdr_id) ) { if ( type == anno_is_vcf ) { bcf_hrec_t *hrec = bcf_hdr_get_hrec(header_in, BCF_HL_INFO, "ID", str.s, NULL); if ( !hrec ) error("The tag \"%s\" is not defined in header: %s\n", str.s, rules); tmp.l = 0; bcf_hrec_format(hrec, &tmp); bcf_hdr_append(header_out, tmp.s); bcf_hdr_sync(header_out); hdr_id = bcf_hdr_id2int(header_out, BCF_DT_ID, str.s); assert( bcf_hdr_idinfo_exists(header_out, BCF_HL_INFO, hdr_id) ); } else { error("The tag \"%s\" is not defined in header: %s\n", str.s, rules); } } nc++; cols = (struct anno_col*) realloc(cols, sizeof(struct anno_col)*(nc)); struct anno_col *col = &cols[nc-1]; col->icol = i; col->replace = replace; col->hdr_key = strdup(str.s); col->number = bcf_hdr_id2length(header_out, BCF_HL_INFO, hdr_id); switch ( bcf_hdr_id2type(header_out, BCF_HL_INFO, hdr_id) ) { case BCF_HT_FLAG: col->setter = type == anno_is_vcf ? vcf_setter_info_flag : setter_info_flag; break; case BCF_HT_INT: col->setter = type == anno_is_vcf ? vcf_setter_info_int : setter_info_int; break; case BCF_HT_REAL: col->setter = type == anno_is_vcf ? vcf_setter_info_real : setter_info_real; break; case BCF_HT_STR: col->setter = type == anno_is_vcf ? vcf_setter_info_str : setter_info_str; break; default: error("The type of %s not recognised (%d)\n", str.s, bcf_hdr_id2type(header_out, BCF_HL_INFO, hdr_id)); } } if ( !*se ) break; ss = ++se; } *ncols = nc; if (str.m) free(str.s); if (tmp.m) free(tmp.s); return cols; }