コード例 #1
0
ファイル: bam_sort.c プロジェクト: chapmanb/samtools
int bam_merge(int argc, char *argv[])
{
#ifndef _PBGZF_USE 
	int c, is_by_qname = 0, flag = 0, ret = 0, n_threads = 0, level = -1;
#else
	int c, is_by_qname = 0, flag = 0, ret = 0, level = -1;
#endif
	char *fn_headers = NULL, *reg = 0;

	while ((c = getopt(argc, argv, "h:nru1R:f@:l:")) >= 0) {
		switch (c) {
		case 'r': flag |= MERGE_RG; break;
		case 'f': flag |= MERGE_FORCE; break;
		case 'h': fn_headers = strdup(optarg); break;
		case 'n': is_by_qname = 1; break;
		case '1': flag |= MERGE_LEVEL1; break;
		case 'u': flag |= MERGE_UNCOMP; break;
		case 'R': reg = strdup(optarg); break;
		case 'l': level = atoi(optarg); break;
#ifndef _PBGZF_USE 
		case '@': n_threads = atoi(optarg); break;
#endif
		}
	}
	if (optind + 2 >= argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage:   samtools merge [-nr] [-h inh.sam] <out.bam> <in1.bam> <in2.bam> [...]\n\n");
		fprintf(stderr, "Options: -n       sort by read names\n");
		fprintf(stderr, "         -r       attach RG tag (inferred from file names)\n");
		fprintf(stderr, "         -u       uncompressed BAM output\n");
		fprintf(stderr, "         -f       overwrite the output BAM if exist\n");
		fprintf(stderr, "         -1       compress level 1\n");
		fprintf(stderr, "         -l INT   compression level, from 0 to 9 [-1]\n");
#ifndef _PBGZF_USE 
		fprintf(stderr, "         -@ INT   number of BAM compression threads [0]\n");
#endif
		fprintf(stderr, "         -R STR   merge file in the specified region STR [all]\n");
		fprintf(stderr, "         -h FILE  copy the header in FILE to <out.bam> [in1.bam]\n\n");
		fprintf(stderr, "Note: Samtools' merge does not reconstruct the @RG dictionary in the header. Users\n");
		fprintf(stderr, "      must provide the correct header with -h, or uses Picard which properly maintains\n");
		fprintf(stderr, "      the header dictionary in merging.\n\n");
		return 1;
	}
	if (!(flag & MERGE_FORCE) && strcmp(argv[optind], "-")) {
		FILE *fp = fopen(argv[optind], "rb");
		if (fp != NULL) {
			fclose(fp);
			fprintf(stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
			return 1;
		}
	}
#ifndef _PBGZF_USE 
	if (bam_merge_core2(is_by_qname, argv[optind], fn_headers, argc - optind - 1, argv + optind + 1, flag, reg, n_threads, level) < 0) ret = 1;
#else
	if (bam_merge_core2(is_by_qname, argv[optind], fn_headers, argc - optind - 1, argv + optind + 1, flag, reg, level) < 0) ret = 1;
#endif
	free(reg);
	free(fn_headers);
	return ret;
}
コード例 #2
0
ファイル: bam_sort.c プロジェクト: chapmanb/samtools
int bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int flag, const char *reg)
{
#ifndef _PBGZF_USE 
	return bam_merge_core2(by_qname, out, headers, n, fn, flag, reg, 0, -1);
#else
	return bam_merge_core2(by_qname, out, headers, n, fn, flag, reg, -1);
#endif
}
コード例 #3
0
int bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int flag, const char *reg)
{
    char mode[12];
    strcpy(mode, "wb");
    if (flag & MERGE_UNCOMP) strcat(mode, "0");
    else if (flag & MERGE_LEVEL1) strcat(mode, "1");
    return bam_merge_core2(by_qname, out, mode, headers, n, fn, flag, reg, 0);
}
コード例 #4
0
ファイル: bam_sort.c プロジェクト: chapmanb/samtools
/*!
  @abstract Sort an unsorted BAM file based on the chromosome order
  and the leftmost position of an alignment

  @param  is_by_qname whether to sort by query name
  @param  fn       name of the file to be sorted
  @param  prefix   prefix of the output and the temporary files; upon
	                   sucessess, prefix.bam will be written.
  @param  max_mem  approxiate maximum memory (very inaccurate)

  @discussion It may create multiple temporary subalignment files
  and then merge them by calling bam_merge_core(). This function is
  NOT thread safe.
 */
void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level, int sort_type)
{
	int ret, i, n_files = 0;
	size_t mem, max_k, k, max_mem;
	bam_header_t *header;
	bamFile fp;
	bam1_t *b, **buf;
	char *fnout = 0;

	if (n_threads < 2) n_threads = 1;
	g_is_by_qname = is_by_qname;
	max_k = k = 0; mem = 0;
	max_mem = _max_mem * n_threads;
	buf = 0;
	fp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
	if (fp == 0) {
		fprintf(stderr, "[bam_sort_core] fail to open file %s\n", fn);
		return;
	}
	header = bam_header_read(fp);
	if (is_by_qname) change_SO(header, "queryname");
	else change_SO(header, "coordinate");
	// write sub files
	for (;;) {
		if (k == max_k) {
			size_t old_max = max_k;
			max_k = max_k? max_k<<1 : 0x10000;
			buf = realloc(buf, max_k * sizeof(void*));
			memset(buf + old_max, 0, sizeof(void*) * (max_k - old_max));
		}
		if (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t));
		b = buf[k];
		if ((ret = bam_read1(fp, b)) < 0) break;
		if (b->data_len < b->m_data>>2) { // shrink
			b->m_data = b->data_len;
			kroundup32(b->m_data);
			b->data = realloc(b->data, b->m_data);
		}
		mem += sizeof(bam1_t) + b->m_data + sizeof(void*) + sizeof(void*); // two sizeof(void*) for the data allocated to pointer arrays
		++k;
		if (mem >= max_mem) {
			n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads, sort_type);
			mem = k = 0;
		}
	}
	if (ret != -1)
		fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n");
	// output file name
	fnout = calloc(strlen(prefix) + 20, 1);
	if (is_stdout) sprintf(fnout, "-");
	else sprintf(fnout, "%s.bam", prefix);
	// write the final output
	if (n_files == 0) { // a single block
		char mode[8];
		strcpy(mode, "w");
		if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9);
                sort_aux_core(k, buf, sort_type);
#ifndef _PBGZF_USE 
		write_buffer(fnout, mode, k, buf, header, n_threads);
#else
		write_buffer(fnout, mode, k, buf, header);
#endif
	} else { // then merge
		char **fns;
		n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads, sort_type);
		fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n_files);
		fns = (char**)calloc(n_files, sizeof(char*));
		for (i = 0; i < n_files; ++i) {
			fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
			sprintf(fns[i], "%s.%.4d.bam", prefix, i);
		}
#ifndef _PBGZF_USE 
		bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, n_threads, level);
#else
		bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, level);
#endif
		for (i = 0; i < n_files; ++i) {
			unlink(fns[i]);
			free(fns[i]);
		}
		free(fns);
	}
	free(fnout);
	// free
	for (k = 0; k < max_k; ++k) {
		if (!buf[k]) continue;
		free(buf[k]->data);
		free(buf[k]);
	}
	free(buf);
	bam_header_destroy(header);
	bam_close(fp);
}
コード例 #5
0
int bam_merge(int argc, char *argv[])
{
    int c, is_by_qname = 0, flag = 0, ret = 0, n_threads = 0, level = -1;
    char *fn_headers = NULL, *reg = NULL, mode[12];
    long random_seed = (long)time(NULL);
    char** fn = NULL;
    int fn_size = 0;

    if (argc == 1) {
        merge_usage(stdout);
        return 0;
    }

    while ((c = getopt(argc, argv, "h:nru1R:f@:l:cps:b:")) >= 0) {
        switch (c) {
        case 'r': flag |= MERGE_RG; break;
        case 'f': flag |= MERGE_FORCE; break;
        case 'h': fn_headers = strdup(optarg); break;
        case 'n': is_by_qname = 1; break;
        case '1': flag |= MERGE_LEVEL1; level = 1; break;
        case 'u': flag |= MERGE_UNCOMP; level = 0; break;
        case 'R': reg = strdup(optarg); break;
        case 'l': level = atoi(optarg); break;
        case '@': n_threads = atoi(optarg); break;
        case 'c': flag |= MERGE_COMBINE_RG; break;
        case 'p': flag |= MERGE_COMBINE_PG; break;
        case 's': random_seed = atol(optarg); break;
        case 'b': {
            // load the list of files to read
            int nfiles;
            char **fn_read = hts_readlines(optarg, &nfiles);
            if (fn_read) {
                // Append to end of array
                fn = realloc(fn, (fn_size+nfiles) * sizeof(char*));
                if (fn == NULL) { ret = 1; goto end; }
                memcpy(fn+fn_size, fn_read, nfiles * sizeof(char*));
                fn_size += nfiles;
            }
            else {
                fprintf(pysamerr, "[%s] Invalid file list \"%s\"\n", __func__, optarg);
                ret = 1;
            }
            break;
        }
        }
    }
    if ( argc - optind < 1 ) {
        fprintf(pysamerr, "You must at least specify the output file.\n");
        merge_usage(pysamerr);
        return 1;
    }

    srand48(random_seed);
    if (!(flag & MERGE_FORCE) && strcmp(argv[optind], "-")) {
        FILE *fp = fopen(argv[optind], "rb");
        if (fp != NULL) {
            fclose(fp);
            fprintf(pysamerr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
            return 1;
        }
    }

    int nargcfiles = argc - (optind+1);
    if (nargcfiles > 0) {
        // Add argc files to end of array
        fn = realloc(fn, (fn_size+nargcfiles) * sizeof(char*));
        if (fn == NULL) { ret = 1; goto end; }
        memcpy(fn+fn_size, argv + (optind+1), nargcfiles * sizeof(char*));
    }
    if (fn_size+nargcfiles < 2) {
        fprintf(pysamerr, "You must specify at least 2 input files.\n");
        merge_usage(pysamerr);
        return 1;
    }
    strcpy(mode, "wb");
    if (level >= 0) sprintf(strchr(mode, '\0'), "%d", level < 9? level : 9);
    if (bam_merge_core2(is_by_qname, argv[optind], mode, fn_headers, fn_size+nargcfiles, fn, flag, reg, n_threads) < 0) ret = 1;
end:
    if (fn_size > 0) {
        int i;
        for (i=0; i<fn_size; i++) free(fn[i]);
        free(fn);
    }
    free(reg);
    free(fn_headers);
    return ret;
}
コード例 #6
0
/*!
  @abstract Sort an unsorted BAM file based on the chromosome order
  and the leftmost position of an alignment

  @param  is_by_qname whether to sort by query name
  @param  fn       name of the file to be sorted
  @param  prefix   prefix of the temporary files (prefix.NNNN.bam are written)
  @param  fnout    name of the final output file to be written
  @param  modeout  sam_open() mode to be used to create the final output file
  @param  max_mem  approxiate maximum memory (very inaccurate)
  @return 0 for successful sorting, negative on errors

  @discussion It may create multiple temporary subalignment files
  and then merge them by calling bam_merge_core(). This function is
  NOT thread safe.
 */
int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, const char *fnout, const char *modeout, size_t _max_mem, int n_threads)
{
    int ret, i, n_files = 0;
    size_t mem, max_k, k, max_mem;
    bam_hdr_t *header;
    samFile *fp;
    bam1_t *b, **buf;

    if (n_threads < 2) n_threads = 1;
    g_is_by_qname = is_by_qname;
    max_k = k = 0; mem = 0;
    max_mem = _max_mem * n_threads;
    buf = NULL;
    fp = sam_open(fn, "r");
    if (fp == NULL) {
        fprintf(pysamerr, "[bam_sort_core] fail to open file %s\n", fn);
        return -1;
    }
    header = sam_hdr_read(fp);
    if (is_by_qname) change_SO(header, "queryname");
    else change_SO(header, "coordinate");
    // write sub files
    for (;;) {
        if (k == max_k) {
            size_t kk, old_max = max_k;
            max_k = max_k? max_k<<1 : 0x10000;
            buf = (bam1_t**)realloc(buf, max_k * sizeof(bam1_t*));
            for (kk = old_max; kk < max_k; ++kk) buf[kk] = NULL;
        }
        if (buf[k] == NULL) buf[k] = bam_init1();
        b = buf[k];
        if ((ret = sam_read1(fp, header, b)) < 0) break;
        if (b->l_data < b->m_data>>2) { // shrink
            b->m_data = b->l_data;
            kroundup32(b->m_data);
            b->data = (uint8_t*)realloc(b->data, b->m_data);
        }
        mem += sizeof(bam1_t) + b->m_data + sizeof(void*) + sizeof(void*); // two sizeof(void*) for the data allocated to pointer arrays
        ++k;
        if (mem >= max_mem) {
            n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
            mem = k = 0;
        }
    }
    if (ret != -1)
        fprintf(pysamerr, "[bam_sort_core] truncated file. Continue anyway.\n");
    // write the final output
    if (n_files == 0) { // a single block
        ks_mergesort(sort, k, buf, 0);
        write_buffer(fnout, modeout, k, buf, header, n_threads);
    } else { // then merge
        char **fns;
        n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
        fprintf(pysamerr, "[bam_sort_core] merging from %d files...\n", n_files);
        fns = (char**)calloc(n_files, sizeof(char*));
        for (i = 0; i < n_files; ++i) {
            fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
            sprintf(fns[i], "%s.%.4d.bam", prefix, i);
        }
        if (bam_merge_core2(is_by_qname, fnout, modeout, NULL, n_files, fns, MERGE_COMBINE_RG|MERGE_COMBINE_PG, NULL, n_threads) < 0) {
            // Propagate bam_merge_core2() failure; it has already emitted a
            // message explaining the failure, so no further message is needed.
            return -1;
        }
        for (i = 0; i < n_files; ++i) {
            unlink(fns[i]);
            free(fns[i]);
        }
        free(fns);
    }
    // free
    for (k = 0; k < max_k; ++k) bam_destroy1(buf[k]);
    free(buf);
    bam_hdr_destroy(header);
    sam_close(fp);
    return 0;
}
コード例 #7
0
ファイル: bam_sort.c プロジェクト: 05curranth/samtools
int bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int flag, const char *reg)
{
	return bam_merge_core2(by_qname, out, headers, n, fn, flag, reg, 0, -1);
}