static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int idx) { int i, j, n; // Are these samples "old-name new-name" pairs? void *hash = khash_str2str_init(); for (i=0; i<nsamples; i++) { char *key, *value; key = value = samples[i]; while ( *value && !isspace(*value) ) value++; if ( !*value ) break; *value = 0; value++; while ( isspace(*value) ) value++; khash_str2str_set(hash,key,value); } if ( i!=nsamples ) // not "old-name new-name" pairs { khash_str2str_destroy(hash); return 0; } while ( hdr->l>0 && isspace(hdr->s[hdr->l-1]) ) hdr->l--; // remove trailing newlines hdr->s[hdr->l] = 0; kstring_t tmp = {0,0,0}; i = j = n = 0; while ( hdr->s[idx+i] && hdr->s[idx+i]) { if ( hdr->s[idx+i]=='\t' ) { hdr->s[idx+i] = 0; if ( ++n>9 ) { char *ori = khash_str2str_get(hash,hdr->s+idx+j); kputs(ori ? ori : hdr->s+idx+j, &tmp); } else kputs(hdr->s+idx+j, &tmp); kputc('\t',&tmp); j = ++i; continue; } i++; } char *ori = khash_str2str_get(hash,hdr->s+idx+j); kputs(ori ? ori : hdr->s+idx+j, &tmp); if ( hash ) khash_str2str_destroy(hash); hdr->l = idx; kputs(tmp.s, hdr); kputc('\n', hdr); free(tmp.s); return 1; }
int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file) { if ( list[0]!='^' ) bsmpl->sample_logic = 1; else list++; int i, nsamples = 0; char **samples = hts_readlist(list, is_file, &nsamples); if ( !nsamples ) return 0; kstring_t ori = {0,0,0}; kstring_t ren = {0,0,0}; bsmpl->sample_list = khash_str2str_init(); for (i=0; i<nsamples; i++) { char *ptr = samples[i]; ori.l = ren.l = 0; int escaped = 0; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } if ( isspace(*ptr) && !escaped ) break; kputc(*ptr, &ori); escaped = 0; ptr++; } if ( *ptr ) { while ( *ptr && isspace(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } if ( isspace(*ptr) && !escaped ) break; kputc(*ptr, &ren); escaped = 0; ptr++; } } khash_str2str_set(bsmpl->sample_list,strdup(ori.s),strdup(ren.l?ren.s:ori.s)); free(samples[i]); } free(samples); free(ori.s); free(ren.s); return nsamples; }
int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file) { if ( list[0]!='^' ) bsmpl->rg_logic = 1; else list++; int i, nrows = 0; char **rows = hts_readlist(list, is_file, &nrows); if ( !nrows ) return 0; kstring_t fld1 = {0,0,0}; kstring_t fld2 = {0,0,0}; kstring_t fld3 = {0,0,0}; bsmpl->rg_list = khash_str2str_init(); for (i=0; i<nrows; i++) { char *ptr = rows[i]; fld1.l = fld2.l = fld3.l = 0; int escaped = 0; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } if ( isspace(*ptr) && !escaped ) break; kputc(*ptr, &fld1); escaped = 0; ptr++; } if ( *ptr ) { while ( *ptr && isspace(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } if ( isspace(*ptr) && !escaped ) break; kputc(*ptr, &fld2); escaped = 0; ptr++; } } if ( *ptr ) { while ( *ptr && isspace(*ptr) ) ptr++; while ( *ptr ) { if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; } if ( isspace(*ptr) && !escaped ) break; kputc(*ptr, &fld3); escaped = 0; ptr++; } } if ( fld3.l ) { // ID FILE SAMPLE kputc('\t',&fld1); kputs(fld2.s,&fld1); fld2.l = 0; kputs(fld3.s,&fld2); } // fld2.s now contains a new sample name. If NULL, use \t to keep the bam header name char *value = khash_str2str_get(bsmpl->rg_list,fld1.s); if ( !value ) khash_str2str_set(bsmpl->rg_list,strdup(fld1.s),strdup(fld2.l?fld2.s:"\t")); else if ( strcmp(value,fld2.l?fld2.s:"\t") ) error("Error: The read group \"%s\" was assigned to two different samples: \"%s\" and \"%s\"\n", fld1.s,value,fld2.l?fld2.s:"\t"); free(rows[i]); } free(rows); free(fld1.s); free(fld2.s); free(fld3.s); return nrows; }