コード例 #1
0
ファイル: bgzf_stubs.c プロジェクト: mlin/ocaml-bgzf
value caml_bgzf_set_cache_size(value bgzf, value sz) {
	CAMLparam2(bgzf,sz);
	bgzf_set_cache_size(BGZF_val(bgzf),Val_int(sz));
	CAMLreturn(Val_unit);
}
コード例 #2
0
ファイル: ngsF.cpp プロジェクト: mojaveazure/ngsF
int main (int argc, char **argv) {
    /////////////////////
    // Parse Arguments //
    /////////////////////
    params *pars = new params;
    init_pars(pars);
    parse_cmd_args(argc, argv, pars);
    if( pars->version ) {
        printf("ngsF v%s\nCompiled on %s @ %s", version, __DATE__, __TIME__);
#ifdef _USE_BGZF
        printf(" (BGZF library)\n");
#else
        printf(" (STD library)\n");
#endif

        exit(0);
    }
    if( pars->verbose >= 1 ) {
        printf("==> Input Arguments:\n");
        printf("\tglf file: %s\n\tinit_values: %s\n\tfreq_fixed: %s\n\tout file: %s\n\tn_ind: %d\n\tn_sites: %lu\n\tchunk_size: %lu\n\tfast_lkl: %s\n\tapprox_EM: %s\n\tcall_geno: %s\n\tmax_iters: %d\n\tmin_epsilon: %.10f\n\tn_threads: %d\n\tseed: %lu\n\tquick: %s\n\tversion: %s\n\tverbose: %d\n\n",
               pars->in_glf, pars->init_values, pars->freq_fixed ? "true":"false", pars->out_file, pars->n_ind, pars->n_sites, pars->max_chunk_size, pars->fast_lkl ? "true":"false", pars->approx_EM ? "true":"false", pars->call_geno ? "true":"false", pars->max_iters, pars->min_epsilon, pars->n_threads, pars->seed, pars->quick ? "true":"false", version, pars->verbose);
    }
    if( pars->verbose > 4 ) printf("==> Verbose values greater than 4 for debugging purpose only. Expect large amounts of info on screen\n");



    /////////////////////
    // Check Arguments //
    /////////////////////
    if(pars->in_glf == NULL)
        error(__FUNCTION__,"GL input file (-glf) missing!");
    else if( strcmp(pars->in_glf, "-") == 0 ) {
        pars->in_glf_type = new char[6];
        pars->in_glf_type = strcat(pars->in_glf_type, "STDIN");
    } else {
        pars->in_glf_type = strrchr(pars->in_glf, '.');
        if(pars->in_glf_type == NULL)
            error(__FUNCTION__,"invalid file type!");
    }
    if(pars->out_file == NULL)
        error(__FUNCTION__,"output file (-out) missing!");
    if(pars->n_ind == 0)
        error(__FUNCTION__,"number of individuals (-n_ind) missing!");
    if(pars->n_sites == 0)
        error(__FUNCTION__,"number of sites (-n_sites) missing!");



    ///////////////////////
    // Check input files //
    ///////////////////////
    // Get file total size
    struct stat st;
    stat(pars->in_glf, &st);
    if( strcmp(pars->in_glf_type, "STDIN") != 0 ) {
        if( pars->n_sites == st.st_size/sizeof(double)/pars->n_ind/3 && strcmp(pars->in_glf_type, ".glf") == 0 ) {
            if(pars->verbose >= 1)
                printf("==> UNCOMP input file (\"%s\"): number of sites (%lu) match expected file size\n", pars->in_glf_type, pars->n_sites);
        } else if( strcmp(pars->in_glf_type, ".glf") != 0 ) {
            if( pars->verbose >= 1)
                printf("==> COMPRESSED input file (\"%s\"): number of sites (%lu) do NOT match expected file size\n", pars->in_glf_type, pars->n_sites);
        } else
            error(__FUNCTION__,"wrong number of sites or invalid/corrupt file!");
    }


    // Adjust max_chunk_size in case of fewer sites
    if(pars->max_chunk_size > pars->n_sites) {
        if( pars->verbose >= 1 ) printf("==> Fewer sites (%lu) than chunk_size (%lu). Reducing chunk size to match number of sites\n", pars->n_sites, pars->max_chunk_size);
        pars->max_chunk_size = pars->n_sites;
    }
    // Calculate total number of chunks
    pars->n_chunks = ceil( (double) pars->n_sites/ (double) pars->max_chunk_size );
    if( pars->verbose >= 1 ) printf("==> Analysis will be run in %ld chunk(s)\n", pars->n_chunks);
    // Alocate memory for the chunk index
    pars->chunks_voffset = new int64_t[pars->n_chunks];
    memset(pars->chunks_voffset, 0, pars->n_chunks*sizeof(int64_t));
    // Adjust thread number to chunks
    if(pars->n_chunks < pars->n_threads) {
        if( pars->verbose >= 1 ) printf("==> Fewer chunks (%ld) than threads (%d). Reducing the number of threads to match number of chunks\n", pars->n_chunks, pars->n_threads);
        pars->n_threads = pars->n_chunks;
    }


    // Open input file
#ifdef _USE_BGZF
    if( pars->verbose >= 1 ) printf("==> Using BGZF I/O library\n");
    // Open BGZIP file
    if( strcmp(pars->in_glf_type, ".bgz") == 0 ) {
        if( (pars->in_glf_fh = bgzf_open(pars->in_glf, "rb")) < 0 )
            error(__FUNCTION__,"Cannot open BGZIP file!");
    } else
        error(__FUNCTION__,"BGZF library only supports BGZIP files!");

    bgzf_set_cache_size(pars->in_glf_fh, CACHE_SIZE * 1024uL * 1024uL * 1024uL);
#else

    if( pars->verbose >= 1 ) printf("==> Using native I/O library\n");
    // Open GLF file
    if( strcmp(pars->in_glf_type, "STDIN") == 0 )
        pars->in_glf_fh = stdin;
    else if( strcmp(pars->in_glf_type, ".glf") == 0 ) {
        if( (pars->in_glf_fh = fopen(pars->in_glf, "rb")) == NULL )
            error(__FUNCTION__,"Cannot open GLF file!");
    } else
        error(__FUNCTION__,"Standard library only supports UNCOMPRESSED GLF files!");

    // Allocate memory and read from the file
    pars->data = new double* [pars->n_sites];
    for(uint64_t s = 0; s < pars->n_sites; s++) {
        pars->data[s] = new double[pars->n_ind * 3];
        if( fread (pars->data[s], sizeof(double), pars->n_ind * 3, pars->in_glf_fh) != pars->n_ind * 3)
            error(__FUNCTION__,"cannot read GLF file!");
        if(pars->call_geno)
            call_geno(pars->data[s], pars->n_ind, 3);
    }
#endif
    if( pars->in_glf_fh == NULL )
        error(__FUNCTION__,"cannot open GLF file!");



    ///////////////////////////////////
    // Declare variables for results //
    ///////////////////////////////////
    out_data *output = new out_data;
    output->site_freq = new double[pars->n_sites];
    output->site_freq_num = new double[pars->n_sites];
    output->site_freq_den = new double[pars->n_sites];
    output->site_prob_var = new double[pars->n_sites];
    output->site_tmpprob_var = new double[pars->n_sites];
    output->indF = new double[pars->n_ind];
    output->indF_num = new double[pars->n_ind];
    output->indF_den = new double[pars->n_ind];
    output->ind_lkl = new double[pars->n_ind];
    // Initialize output
    init_output(pars, output);



    //////////////////
    // Analyze Data //
    //////////////////
    if( pars->verbose >= 1 && !pars->fast_lkl && strcmp("e", pars->init_values) != 0 ) {
        printf("==> Initial LogLkl: %.15f\n", full_HWE_like(pars, output->site_freq, output->indF, 0, pars->n_ind));
        fflush(stdout);
    }
    do_EM(pars, output);
    if( pars->verbose >= 1 ) printf("\nFinal logLkl: %f\n", output->global_lkl);



    //////////////////
    // Print Output //
    //////////////////
    FILE *out_file;
    if( pars->verbose >= 1 ) printf("Printing Output...\n");

    out_file = fopen(pars->out_file, "w");
    if(out_file == NULL)
        error(__FUNCTION__,"Cannot open OUTPUT file!");
    for(uint16_t i = 0; i < pars->n_ind; i++)
        fprintf(out_file,"%f\n", output->indF[i]);
    fclose(out_file);



    //////////////////////
    // Close Input File //
    //////////////////////
    if( pars->verbose >= 1 ) printf("Exiting...\n");
#ifdef _USE_BGZF
    bgzf_close(pars->in_glf_fh);
#else
    for(uint64_t s = 0; s < pars->n_sites; s++)
        delete [] pars->data[s];
    delete [] pars->data;
    fclose(pars->in_glf_fh);
#endif



    /////////////////
    // Free Memory //
    /////////////////
    delete [] output->site_freq;
    delete [] output->site_freq_num;
    delete [] output->site_freq_den;
    delete [] output->site_prob_var;
    delete [] output->indF;
    delete [] output->indF_num;
    delete [] output->indF_den;
    delete [] output->ind_lkl;
    delete output;
    //if( strcmp("e", pars->init_values) == 0 )
    //delete [] pars->init_values;
    delete [] pars->chunks_voffset;
    delete pars;

    return 0;
}
コード例 #3
0
ファイル: bedcov.c プロジェクト: leecbaker/samtools
int main_bedcov(int argc, char *argv[])
{
	extern void bam_init_header_hash(bam_header_t*);
	gzFile fp;
	kstring_t str;
	kstream_t *ks;
	bam_index_t **idx;
	bam_header_t *h = 0;
	aux_t **aux;
	int *n_plp, dret, i, n, c, min_mapQ = 0;
	int64_t *cnt;
	const bam_pileup1_t **plp;

	while ((c = getopt(argc, argv, "Q:")) >= 0) {
		switch (c) {
		case 'Q': min_mapQ = atoi(optarg); break;
		}
	}
	if (optind + 2 > argc) {
		fprintf(stderr, "Usage: samtools bedcov <in.bed> <in1.bam> [...]\n");
		return 1;
	}
	memset(&str, 0, sizeof(kstring_t));
	n = argc - optind - 1;
	aux = calloc(n, sizeof(aux_t*));
	idx = calloc(n, sizeof(bam_index_t*));
	for (i = 0; i < n; ++i) {
		aux[i] = calloc(1, sizeof(aux_t));
		aux[i]->min_mapQ = min_mapQ;
		aux[i]->fp = bam_open(argv[i+optind+1], "r");
		idx[i] = bam_index_load(argv[i+optind+1]);
		if (aux[i]->fp == 0 || idx[i] == 0) {
			fprintf(stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
			return 2;
		}
		bgzf_set_cache_size(aux[i]->fp, 20);
		if (i == 0) h = bam_header_read(aux[0]->fp);
	}
	bam_init_header_hash(h);
	cnt = calloc(n, 8);

	fp = gzopen(argv[optind], "rb");
	ks = ks_init(fp);
	n_plp = calloc(n, sizeof(int));
	plp = calloc(n, sizeof(bam_pileup1_t*));
	while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
		char *p, *q;
		int tid, beg, end, pos;
		bam_mplp_t mplp;

		for (p = q = str.s; *p && *p != '\t'; ++p);
		if (*p != '\t') goto bed_error;
		*p = 0; tid = bam_get_tid(h, q); *p = '\t';
		if (tid < 0) goto bed_error;
		for (q = p = p + 1; isdigit(*p); ++p);
		if (*p != '\t') goto bed_error;
		*p = 0; beg = atoi(q); *p = '\t';
		for (q = p = p + 1; isdigit(*p); ++p);
		if (*p == '\t' || *p == 0) {
			int c = *p;
			*p = 0; end = atoi(q); *p = c;
		} else goto bed_error;

		for (i = 0; i < n; ++i) {
			if (aux[i]->iter) bam_iter_destroy(aux[i]->iter);
			aux[i]->iter = bam_iter_query(idx[i], tid, beg, end);
		}
		mplp = bam_mplp_init(n, read_bam, (void**)aux);
		bam_mplp_set_maxcnt(mplp, 64000);
		memset(cnt, 0, 8 * n);
		while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0)
			if (pos >= beg && pos < end)
				for (i = 0; i < n; ++i) cnt[i] += n_plp[i];
		for (i = 0; i < n; ++i) {
			kputc('\t', &str);
			kputl(cnt[i], &str);
		}
		puts(str.s);
		bam_mplp_destroy(mplp);
		continue;

bed_error:
		fprintf(stderr, "Errors in BED line '%s'\n", str.s);
	}
	free(n_plp); free(plp);
	ks_destroy(ks);
	gzclose(fp);

	free(cnt);
	for (i = 0; i < n; ++i) {
		if (aux[i]->iter) bam_iter_destroy(aux[i]->iter);
		bam_index_destroy(idx[i]);
		bam_close(aux[i]->fp);
		free(aux[i]);
	}
	bam_header_destroy(h);
	free(aux); free(idx);
	free(str.s);
	return 0;
}