コード例 #1
0
ファイル: bam_reheader.c プロジェクト: BIGLabHYU/samtools
int cram_reheader_inplace(cram_fd *fd, const bam_hdr_t *h, const char *arg_list,
                         int add_PG)
{
    switch (cram_major_vers(fd)) {
    case 2: return cram_reheader_inplace2(fd, h, arg_list, add_PG);
    case 3: return cram_reheader_inplace3(fd, h, arg_list, add_PG);
    default:
        fprintf(stderr, "[%s] unsupported CRAM version %d\n", __func__,
                cram_major_vers(fd));
        return -1;
    }
}
コード例 #2
0
ファイル: bam_reheader.c プロジェクト: BIGLabHYU/samtools
/*
 * Reads a version 2 CRAM file and replaces the header in-place,
 * provided the header is small enough to fit without growing the
 * entire file.
 *
 * Version 2 format has an uncompressed SAM header with multiple nul
 * termination bytes to permit inline header editing.
 *
 * Returns 0 on success;
 *        -1 on general failure;
 *        -2 on failure due to insufficient size
 */
int cram_reheader_inplace2(cram_fd *fd, const bam_hdr_t *h, const char *arg_list,
                          int add_PG)
{
    cram_container *c = NULL;
    cram_block *b = NULL;
    SAM_hdr *hdr = NULL;
    off_t start;
    int ret = -1;

    if (cram_major_vers(fd) < 2 ||
        cram_major_vers(fd) > 3) {
        fprintf(stderr, "[%s] unsupported CRAM version %d\n", __func__,
                cram_major_vers(fd));
        goto err;
    }

    if (!(hdr = sam_hdr_parse_(h->text, h->l_text)))
        goto err;

    if (add_PG && sam_hdr_add_PG(hdr, "samtools", "VN", samtools_version(),
                                 arg_list ? "CL": NULL,
                                 arg_list ? arg_list : NULL,
                                 NULL))
        goto err;

    int header_len = sam_hdr_length(hdr);
    /* Fix M5 strings? Maybe out of scope for this tool */

    // Load the existing header
    if ((start = hseek(cram_fd_get_fp(fd), 26, SEEK_SET)) != 26)
        goto err;

    if (!(c = cram_read_container(fd)))
        goto err;

    // Version 2.1 has a single uncompressed block which is nul
    // terminated with many nuls to permit growth.
    //
    // So load old block and keep all contents identical bar the
    // header text itself
    if (!(b = cram_read_block(fd)))
        goto err;

    if (cram_block_get_uncomp_size(b) < header_len+4) {
        fprintf(stderr, "New header will not fit. Use non-inplace version (%d > %d)\n",
                header_len+4, cram_block_get_uncomp_size(b));
        ret = -2;
        goto err;
    }

    cram_block_set_offset(b, 0);   // rewind block
    int32_put_blk(b, header_len);
    cram_block_append(b, sam_hdr_str(hdr), header_len);
    // Zero the remaining block
    memset(cram_block_get_data(b)+cram_block_get_offset(b), 0,
           cram_block_get_uncomp_size(b) - cram_block_get_offset(b));
    // Make sure all sizes and byte-offsets are consistent after memset
    cram_block_set_offset(b, cram_block_get_uncomp_size(b));
    cram_block_set_comp_size(b, cram_block_get_uncomp_size(b));

    if (hseek(cram_fd_get_fp(fd), start, SEEK_SET) != start)
        goto err;

    if (cram_write_container(fd, c) == -1)
        goto err;

    if (cram_write_block(fd, b) == -1)
        goto err;

    ret = 0;
 err:
    if (c) cram_free_container(c);
    if (b) cram_free_block(b);
    if (hdr) sam_hdr_free(hdr);

    return ret;
}
コード例 #3
0
ファイル: bam_reheader.c プロジェクト: BIGLabHYU/samtools
/*
 * Reads a version 3 CRAM file and replaces the header in-place,
 * provided the header is small enough to fit without growing the
 * entire file.
 *
 * Version 3 format has a SAM header held as an (optionally)
 * compressed block within the header container.  Additional
 * uncompressed blocks or simply unallocated space (the difference
 * between total block sizes and the container size) are used to
 * provide room for growth or contraction of the compressed header.
 *
 * Returns 0 on success;
 *        -1 on general failure;
 *        -2 on failure due to insufficient size
 */
int cram_reheader_inplace3(cram_fd *fd, const bam_hdr_t *h, const char *arg_list,
                          int add_PG)
{
    cram_container *c = NULL;
    cram_block *b = NULL;
    SAM_hdr *hdr = NULL;
    off_t start, sz, end;
    int container_sz, max_container_sz;
    char *buf = NULL;
    int ret = -1;

    if (cram_major_vers(fd) < 2 ||
        cram_major_vers(fd) > 3) {
        fprintf(stderr, "[%s] unsupported CRAM version %d\n", __func__,
                cram_major_vers(fd));
        goto err;
    }

    if (!(hdr = sam_hdr_parse_(h->text, h->l_text)))
        goto err;

    if (add_PG && sam_hdr_add_PG(hdr, "samtools", "VN", samtools_version(),
                                 arg_list ? "CL": NULL,
                                 arg_list ? arg_list : NULL,
                                 NULL))
        goto err;

    int header_len = sam_hdr_length(hdr);
    /* Fix M5 strings? Maybe out of scope for this tool */

    // Find current size of SAM header block
    if ((start = hseek(cram_fd_get_fp(fd), 26, SEEK_SET)) != 26)
        goto err;

    if (!(c = cram_read_container(fd)))
        goto err;

    // +5 allows num_landmarks to increase from 0 to 1 (Cramtools)
    max_container_sz = cram_container_size(c)+5;

    sz = htell(cram_fd_get_fp(fd)) + cram_container_get_length(c) - start;
    end = htell(cram_fd_get_fp(fd)) + cram_container_get_length(c);

    // We force 1 block instead of (optionally) 2.  C CRAM
    // implementations for v3 were writing 1 compressed block followed
    // by 1 uncompressed block.  However this is tricky to deal with
    // as changing block sizes can mean the block header also changes
    // size due to itf8 and variable size integers.
    //
    // If we had 1 block, this doesn't change anything.
    // If we had 2 blocks, the new container header will be smaller by
    // 1+ bytes, requiring the cram_container_get_length(c) to be larger in value.
    // However this is an int32 instead of itf8 so the container
    // header structure stays the same size.  This means we can always
    // reduce the number of blocks without running into size problems.
    cram_container_set_num_blocks(c, 1);
    int32_t *landmark;
    int32_t num_landmarks;
    landmark = cram_container_get_landmarks(c, &num_landmarks);
    if (num_landmarks && landmark) {
        num_landmarks = 1;
        landmark[0] = 0;
    } else {
        num_landmarks = 0;
    }
    cram_container_set_landmarks(c, num_landmarks, landmark);

    buf = malloc(max_container_sz);
    container_sz = max_container_sz;
    if (cram_store_container(fd, c, buf, &container_sz) != 0)
        goto err;

    if (!buf)
        goto err;

    // Proposed new length, but changing cram_container_get_length(c) may change the
    // container_sz and thus the remainder (cram_container_get_length(c) itself).
    cram_container_set_length(c, sz - container_sz);

    int old_container_sz = container_sz;
    container_sz = max_container_sz;
    if (cram_store_container(fd, c, buf, &container_sz) != 0)
        goto err;

    if (old_container_sz != container_sz) {
        fprintf(stderr, "Quirk of fate makes this troublesome! "
                "Please use non-inplace version.\n");
        goto err;
    }



    // Version 3.0 supports compressed header
    b = cram_new_block(FILE_HEADER, 0);
    int32_put_blk(b, header_len);
    cram_block_append(b, sam_hdr_str(hdr), header_len);
    cram_block_update_size(b);

    cram_compress_block(fd, b, NULL, -1, -1);

    if (hseek(cram_fd_get_fp(fd), 26, SEEK_SET) != 26)
        goto err;

    if (cram_block_size(b) > cram_container_get_length(c)) {
        fprintf(stderr, "New header will not fit. Use non-inplace version"
                " (%d > %d)\n",
                (int)cram_block_size(b), cram_container_get_length(c));
        ret = -2;
        goto err;
    }

    if (cram_write_container(fd, c) == -1)
        goto err;

    if (cram_write_block(fd, b) == -1)
        goto err;

    // Blank out the remainder
    int rsz = end - htell(cram_fd_get_fp(fd));
    assert(rsz >= 0);
    if (rsz) {
        char *rem = calloc(1, rsz);
        ret = hwrite(cram_fd_get_fp(fd), rem, rsz) == rsz ? 0 : -1;
        free(rem);
    }

 err:
    if (c) cram_free_container(c);
    if (buf) free(buf);
    if (b) cram_free_block(b);
    if (hdr) sam_hdr_free(hdr);

    return ret;
}
コード例 #4
0
ファイル: bam_cat.c プロジェクト: dozy/samtools
/*
 * Check the files are consistent and capable of being concatenated.
 * Also fills out the rg2id read-group hash and the version numbers
 * and produces a new bam_hdr_t structure with merged RG lines.
 * Note it is only a simple merge, as we lack the niceties of a proper
 * header API.
 *
 * Returns updated header on success;
 *        NULL on failure.
 */
static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t *h,
                                     khash_s2i **rg2id, int *vers_maj_p, int *vers_min_p) {
    int i, vers_maj = -1, vers_min = -1;
    bam_hdr_t *new_h = NULL;

    if (h) {
        new_h = bam_hdr_dup(h);
        *rg2id = hash_rg(new_h);
    }

    for (i = 0; i < nfn; ++i) {
        samFile *in;
        cram_fd *in_c;
        khint_t ki;
        int new_rg = -1;

        in = sam_open(fn[i], "rc");
        if (in == 0) {
            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
            return NULL;
        }
        in_c = in->fp.cram;

        int vmaj = cram_major_vers(in_c);
        int vmin = cram_minor_vers(in_c);
        if ((vers_maj != -1 && vers_maj != vmaj) ||
            (vers_min != -1 && vers_min != vmin)) {
            fprintf(stderr, "[%s] ERROR: input files have differing version numbers.\n",
                    __func__);
            return NULL;
        }
        vers_maj = vmaj;
        vers_min = vmin;

        bam_hdr_t *old = sam_hdr_read(in);
        khash_s2i *rg2id_in = hash_rg(old);

        if (!new_h) {
            new_h = bam_hdr_dup(old);
            *rg2id = hash_rg(new_h);
        }

        // Add any existing @RG entries to our global @RG hash.
        for (ki = 0; ki < rg2id_in->n_id; ki++) {
            int added;

            new_rg = hash_s2i_inc(*rg2id, rg2id_in->id[ki], rg2id_in->line[ki], &added);
            //fprintf(stderr, "RG %s: #%d -> #%d\n",
            //        rg2id_in->id[ki], ki, new_rg);

            if (added) {
                // Also add to new_h
                const char *line = rg2id_in->line[ki];
                const char *line_end = line;
                while (*line && *line_end++ != '\n')
                    ;
                new_h->l_text += line_end - line;
                new_h->text = realloc(new_h->text, new_h->l_text+1);
                strncat(&new_h->text[new_h->l_text - (line_end - line)],
                        line, line_end - line);
            }

            if (new_rg != ki && rg2id_in->n_id > 1) {
                fprintf(stderr, "[%s] ERROR: Same size @RG lists but differing order / contents\n",
                        __func__);
                return NULL;
            }
        }

        hash_s2i_free(rg2id_in);
        bam_hdr_destroy(old);
        sam_close(in);
    }

    *vers_maj_p = vers_maj;
    *vers_min_p = vers_min;

    return new_h;
}