static int set_sample_pairs(char **samples, int nsamples, kstring_t *hdr, int idx)
{
    int i, j, n;

    // Are these samples "old-name new-name" pairs?
    void *hash = khash_str2str_init();
    for (i=0; i<nsamples; i++)
    {
        char *key, *value;
        key = value = samples[i];
        while ( *value && !isspace(*value) ) value++;
        if ( !*value ) break;
        *value = 0; value++;
        while ( isspace(*value) ) value++;
        khash_str2str_set(hash,key,value);
    }
    if ( i!=nsamples )  // not "old-name new-name" pairs
    {
        khash_str2str_destroy(hash);
        return 0;
    }

    while ( hdr->l>0 && isspace(hdr->s[hdr->l-1]) ) hdr->l--;  // remove trailing newlines
    hdr->s[hdr->l] = 0;

    kstring_t tmp = {0,0,0};
    i = j = n = 0;
    while ( hdr->s[idx+i] && hdr->s[idx+i])
    {
        if ( hdr->s[idx+i]=='\t' )
        {
            hdr->s[idx+i] = 0;

            if ( ++n>9 )
            {
                char *ori = khash_str2str_get(hash,hdr->s+idx+j);
                kputs(ori ? ori : hdr->s+idx+j, &tmp);
            }
            else
                kputs(hdr->s+idx+j, &tmp);

            kputc('\t',&tmp);

            j = ++i;
            continue;
        }
        i++;
    }
    char *ori = khash_str2str_get(hash,hdr->s+idx+j);
    kputs(ori ? ori : hdr->s+idx+j, &tmp);

    if ( hash ) khash_str2str_destroy(hash);

    hdr->l = idx;
    kputs(tmp.s, hdr);
    kputc('\n', hdr);
    free(tmp.s);

    return 1;
}
Beispiel #2
0
int bam_smpl_add_samples(bam_smpl_t *bsmpl, char *list, int is_file)
{
    if ( list[0]!='^' ) bsmpl->sample_logic = 1;
    else list++;

    int i, nsamples = 0;
    char **samples = hts_readlist(list, is_file, &nsamples);
    if ( !nsamples ) return 0;

    kstring_t ori = {0,0,0};
    kstring_t ren = {0,0,0};

    bsmpl->sample_list = khash_str2str_init();
    for (i=0; i<nsamples; i++)
    {
        char *ptr = samples[i];
        ori.l = ren.l = 0;
        int escaped = 0;
        while ( *ptr )
        {
            if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
            if ( isspace(*ptr) && !escaped ) break;
            kputc(*ptr, &ori);
            escaped = 0;
            ptr++;
        }
        if ( *ptr )
        {
            while ( *ptr && isspace(*ptr) ) ptr++;
            while ( *ptr )
            {
                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
                if ( isspace(*ptr) && !escaped ) break;
                kputc(*ptr, &ren);
                escaped = 0;
                ptr++;
            }
        }
        khash_str2str_set(bsmpl->sample_list,strdup(ori.s),strdup(ren.l?ren.s:ori.s));
        free(samples[i]);
    }
    free(samples);
    free(ori.s);
    free(ren.s);
    return nsamples;
}
Beispiel #3
0
int bam_smpl_add_readgroups(bam_smpl_t *bsmpl, char *list, int is_file)
{
    if ( list[0]!='^' ) bsmpl->rg_logic = 1;
    else list++;

    int i, nrows  = 0;
    char **rows = hts_readlist(list, is_file, &nrows);
    if ( !nrows ) return 0;

    kstring_t fld1 = {0,0,0};
    kstring_t fld2 = {0,0,0};
    kstring_t fld3 = {0,0,0};

    bsmpl->rg_list = khash_str2str_init();
    for (i=0; i<nrows; i++)
    {
        char *ptr = rows[i];
        fld1.l = fld2.l = fld3.l = 0;
        int escaped = 0;
        while ( *ptr )
        {
            if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
            if ( isspace(*ptr) && !escaped ) break;
            kputc(*ptr, &fld1);
            escaped = 0;
            ptr++;
        }
        if ( *ptr )
        {
            while ( *ptr && isspace(*ptr) ) ptr++;
            while ( *ptr )
            {
                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
                if ( isspace(*ptr) && !escaped ) break;
                kputc(*ptr, &fld2);
                escaped = 0;
                ptr++;
            }
        }
        if ( *ptr )
        {
            while ( *ptr && isspace(*ptr) ) ptr++;
            while ( *ptr )
            {
                if ( *ptr=='\\' && !escaped ) { escaped = 1; ptr++; continue; }
                if ( isspace(*ptr) && !escaped ) break;
                kputc(*ptr, &fld3);
                escaped = 0;
                ptr++;
            }
        }
        if ( fld3.l )
        {
            // ID FILE SAMPLE
            kputc('\t',&fld1);
            kputs(fld2.s,&fld1);
            fld2.l = 0;
            kputs(fld3.s,&fld2);
        }
        // fld2.s now contains a new sample name. If NULL, use \t to keep the bam header name
        char *value = khash_str2str_get(bsmpl->rg_list,fld1.s);
        if ( !value )
            khash_str2str_set(bsmpl->rg_list,strdup(fld1.s),strdup(fld2.l?fld2.s:"\t"));
        else if ( strcmp(value,fld2.l?fld2.s:"\t") )
            error("Error: The read group \"%s\" was assigned to two different samples: \"%s\" and \"%s\"\n", fld1.s,value,fld2.l?fld2.s:"\t");
        free(rows[i]);
    }
    free(rows);
    free(fld1.s);
    free(fld2.s);
    free(fld3.s);
    return nrows;
}