value caml_bgzf_set_cache_size(value bgzf, value sz) { CAMLparam2(bgzf,sz); bgzf_set_cache_size(BGZF_val(bgzf),Val_int(sz)); CAMLreturn(Val_unit); }
int main (int argc, char **argv) { ///////////////////// // Parse Arguments // ///////////////////// params *pars = new params; init_pars(pars); parse_cmd_args(argc, argv, pars); if( pars->version ) { printf("ngsF v%s\nCompiled on %s @ %s", version, __DATE__, __TIME__); #ifdef _USE_BGZF printf(" (BGZF library)\n"); #else printf(" (STD library)\n"); #endif exit(0); } if( pars->verbose >= 1 ) { printf("==> Input Arguments:\n"); printf("\tglf file: %s\n\tinit_values: %s\n\tfreq_fixed: %s\n\tout file: %s\n\tn_ind: %d\n\tn_sites: %lu\n\tchunk_size: %lu\n\tfast_lkl: %s\n\tapprox_EM: %s\n\tcall_geno: %s\n\tmax_iters: %d\n\tmin_epsilon: %.10f\n\tn_threads: %d\n\tseed: %lu\n\tquick: %s\n\tversion: %s\n\tverbose: %d\n\n", pars->in_glf, pars->init_values, pars->freq_fixed ? "true":"false", pars->out_file, pars->n_ind, pars->n_sites, pars->max_chunk_size, pars->fast_lkl ? "true":"false", pars->approx_EM ? "true":"false", pars->call_geno ? "true":"false", pars->max_iters, pars->min_epsilon, pars->n_threads, pars->seed, pars->quick ? "true":"false", version, pars->verbose); } if( pars->verbose > 4 ) printf("==> Verbose values greater than 4 for debugging purpose only. Expect large amounts of info on screen\n"); ///////////////////// // Check Arguments // ///////////////////// if(pars->in_glf == NULL) error(__FUNCTION__,"GL input file (-glf) missing!"); else if( strcmp(pars->in_glf, "-") == 0 ) { pars->in_glf_type = new char[6]; pars->in_glf_type = strcat(pars->in_glf_type, "STDIN"); } else { pars->in_glf_type = strrchr(pars->in_glf, '.'); if(pars->in_glf_type == NULL) error(__FUNCTION__,"invalid file type!"); } if(pars->out_file == NULL) error(__FUNCTION__,"output file (-out) missing!"); if(pars->n_ind == 0) error(__FUNCTION__,"number of individuals (-n_ind) missing!"); if(pars->n_sites == 0) error(__FUNCTION__,"number of sites (-n_sites) missing!"); /////////////////////// // Check input files // /////////////////////// // Get file total size struct stat st; stat(pars->in_glf, &st); if( strcmp(pars->in_glf_type, "STDIN") != 0 ) { if( pars->n_sites == st.st_size/sizeof(double)/pars->n_ind/3 && strcmp(pars->in_glf_type, ".glf") == 0 ) { if(pars->verbose >= 1) printf("==> UNCOMP input file (\"%s\"): number of sites (%lu) match expected file size\n", pars->in_glf_type, pars->n_sites); } else if( strcmp(pars->in_glf_type, ".glf") != 0 ) { if( pars->verbose >= 1) printf("==> COMPRESSED input file (\"%s\"): number of sites (%lu) do NOT match expected file size\n", pars->in_glf_type, pars->n_sites); } else error(__FUNCTION__,"wrong number of sites or invalid/corrupt file!"); } // Adjust max_chunk_size in case of fewer sites if(pars->max_chunk_size > pars->n_sites) { if( pars->verbose >= 1 ) printf("==> Fewer sites (%lu) than chunk_size (%lu). Reducing chunk size to match number of sites\n", pars->n_sites, pars->max_chunk_size); pars->max_chunk_size = pars->n_sites; } // Calculate total number of chunks pars->n_chunks = ceil( (double) pars->n_sites/ (double) pars->max_chunk_size ); if( pars->verbose >= 1 ) printf("==> Analysis will be run in %ld chunk(s)\n", pars->n_chunks); // Alocate memory for the chunk index pars->chunks_voffset = new int64_t[pars->n_chunks]; memset(pars->chunks_voffset, 0, pars->n_chunks*sizeof(int64_t)); // Adjust thread number to chunks if(pars->n_chunks < pars->n_threads) { if( pars->verbose >= 1 ) printf("==> Fewer chunks (%ld) than threads (%d). Reducing the number of threads to match number of chunks\n", pars->n_chunks, pars->n_threads); pars->n_threads = pars->n_chunks; } // Open input file #ifdef _USE_BGZF if( pars->verbose >= 1 ) printf("==> Using BGZF I/O library\n"); // Open BGZIP file if( strcmp(pars->in_glf_type, ".bgz") == 0 ) { if( (pars->in_glf_fh = bgzf_open(pars->in_glf, "rb")) < 0 ) error(__FUNCTION__,"Cannot open BGZIP file!"); } else error(__FUNCTION__,"BGZF library only supports BGZIP files!"); bgzf_set_cache_size(pars->in_glf_fh, CACHE_SIZE * 1024uL * 1024uL * 1024uL); #else if( pars->verbose >= 1 ) printf("==> Using native I/O library\n"); // Open GLF file if( strcmp(pars->in_glf_type, "STDIN") == 0 ) pars->in_glf_fh = stdin; else if( strcmp(pars->in_glf_type, ".glf") == 0 ) { if( (pars->in_glf_fh = fopen(pars->in_glf, "rb")) == NULL ) error(__FUNCTION__,"Cannot open GLF file!"); } else error(__FUNCTION__,"Standard library only supports UNCOMPRESSED GLF files!"); // Allocate memory and read from the file pars->data = new double* [pars->n_sites]; for(uint64_t s = 0; s < pars->n_sites; s++) { pars->data[s] = new double[pars->n_ind * 3]; if( fread (pars->data[s], sizeof(double), pars->n_ind * 3, pars->in_glf_fh) != pars->n_ind * 3) error(__FUNCTION__,"cannot read GLF file!"); if(pars->call_geno) call_geno(pars->data[s], pars->n_ind, 3); } #endif if( pars->in_glf_fh == NULL ) error(__FUNCTION__,"cannot open GLF file!"); /////////////////////////////////// // Declare variables for results // /////////////////////////////////// out_data *output = new out_data; output->site_freq = new double[pars->n_sites]; output->site_freq_num = new double[pars->n_sites]; output->site_freq_den = new double[pars->n_sites]; output->site_prob_var = new double[pars->n_sites]; output->site_tmpprob_var = new double[pars->n_sites]; output->indF = new double[pars->n_ind]; output->indF_num = new double[pars->n_ind]; output->indF_den = new double[pars->n_ind]; output->ind_lkl = new double[pars->n_ind]; // Initialize output init_output(pars, output); ////////////////// // Analyze Data // ////////////////// if( pars->verbose >= 1 && !pars->fast_lkl && strcmp("e", pars->init_values) != 0 ) { printf("==> Initial LogLkl: %.15f\n", full_HWE_like(pars, output->site_freq, output->indF, 0, pars->n_ind)); fflush(stdout); } do_EM(pars, output); if( pars->verbose >= 1 ) printf("\nFinal logLkl: %f\n", output->global_lkl); ////////////////// // Print Output // ////////////////// FILE *out_file; if( pars->verbose >= 1 ) printf("Printing Output...\n"); out_file = fopen(pars->out_file, "w"); if(out_file == NULL) error(__FUNCTION__,"Cannot open OUTPUT file!"); for(uint16_t i = 0; i < pars->n_ind; i++) fprintf(out_file,"%f\n", output->indF[i]); fclose(out_file); ////////////////////// // Close Input File // ////////////////////// if( pars->verbose >= 1 ) printf("Exiting...\n"); #ifdef _USE_BGZF bgzf_close(pars->in_glf_fh); #else for(uint64_t s = 0; s < pars->n_sites; s++) delete [] pars->data[s]; delete [] pars->data; fclose(pars->in_glf_fh); #endif ///////////////// // Free Memory // ///////////////// delete [] output->site_freq; delete [] output->site_freq_num; delete [] output->site_freq_den; delete [] output->site_prob_var; delete [] output->indF; delete [] output->indF_num; delete [] output->indF_den; delete [] output->ind_lkl; delete output; //if( strcmp("e", pars->init_values) == 0 ) //delete [] pars->init_values; delete [] pars->chunks_voffset; delete pars; return 0; }
int main_bedcov(int argc, char *argv[]) { extern void bam_init_header_hash(bam_header_t*); gzFile fp; kstring_t str; kstream_t *ks; bam_index_t **idx; bam_header_t *h = 0; aux_t **aux; int *n_plp, dret, i, n, c, min_mapQ = 0; int64_t *cnt; const bam_pileup1_t **plp; while ((c = getopt(argc, argv, "Q:")) >= 0) { switch (c) { case 'Q': min_mapQ = atoi(optarg); break; } } if (optind + 2 > argc) { fprintf(stderr, "Usage: samtools bedcov <in.bed> <in1.bam> [...]\n"); return 1; } memset(&str, 0, sizeof(kstring_t)); n = argc - optind - 1; aux = calloc(n, sizeof(aux_t*)); idx = calloc(n, sizeof(bam_index_t*)); for (i = 0; i < n; ++i) { aux[i] = calloc(1, sizeof(aux_t)); aux[i]->min_mapQ = min_mapQ; aux[i]->fp = bam_open(argv[i+optind+1], "r"); idx[i] = bam_index_load(argv[i+optind+1]); if (aux[i]->fp == 0 || idx[i] == 0) { fprintf(stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]); return 2; } bgzf_set_cache_size(aux[i]->fp, 20); if (i == 0) h = bam_header_read(aux[0]->fp); } bam_init_header_hash(h); cnt = calloc(n, 8); fp = gzopen(argv[optind], "rb"); ks = ks_init(fp); n_plp = calloc(n, sizeof(int)); plp = calloc(n, sizeof(bam_pileup1_t*)); while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) { char *p, *q; int tid, beg, end, pos; bam_mplp_t mplp; for (p = q = str.s; *p && *p != '\t'; ++p); if (*p != '\t') goto bed_error; *p = 0; tid = bam_get_tid(h, q); *p = '\t'; if (tid < 0) goto bed_error; for (q = p = p + 1; isdigit(*p); ++p); if (*p != '\t') goto bed_error; *p = 0; beg = atoi(q); *p = '\t'; for (q = p = p + 1; isdigit(*p); ++p); if (*p == '\t' || *p == 0) { int c = *p; *p = 0; end = atoi(q); *p = c; } else goto bed_error; for (i = 0; i < n; ++i) { if (aux[i]->iter) bam_iter_destroy(aux[i]->iter); aux[i]->iter = bam_iter_query(idx[i], tid, beg, end); } mplp = bam_mplp_init(n, read_bam, (void**)aux); bam_mplp_set_maxcnt(mplp, 64000); memset(cnt, 0, 8 * n); while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) if (pos >= beg && pos < end) for (i = 0; i < n; ++i) cnt[i] += n_plp[i]; for (i = 0; i < n; ++i) { kputc('\t', &str); kputl(cnt[i], &str); } puts(str.s); bam_mplp_destroy(mplp); continue; bed_error: fprintf(stderr, "Errors in BED line '%s'\n", str.s); } free(n_plp); free(plp); ks_destroy(ks); gzclose(fp); free(cnt); for (i = 0; i < n; ++i) { if (aux[i]->iter) bam_iter_destroy(aux[i]->iter); bam_index_destroy(idx[i]); bam_close(aux[i]->fp); free(aux[i]); } bam_header_destroy(h); free(aux); free(idx); free(str.s); return 0; }