Esempio n. 1
0
/**
 * Constructor.
 */
VNTRAnnotator::VNTRAnnotator(std::string& ref_fasta_file, bool debug)
{
    vm = new VariantManip(ref_fasta_file.c_str());

    float delta = 0.0001;
    float epsilon = 0.05;
    float tau = 0.01;
    float eta = 0.01;
    float mismatch_penalty = 3;

    ahmm = new AHMM(false);
    ahmm->set_delta(delta);
    ahmm->set_epsilon(epsilon);
    ahmm->set_tau(tau);
    ahmm->set_eta(eta);
    ahmm->set_mismatch_penalty(mismatch_penalty);
    ahmm->initialize_T();

    fai = fai_load(ref_fasta_file.c_str());
    if (fai==NULL)
    {
        fprintf(stderr, "[%s:%d %s] Cannot load genome index: %s\n", __FILE__, __LINE__, __FUNCTION__, ref_fasta_file.c_str());
        exit(1);
    }

    cre = new CandidateRegionExtractor(ref_fasta_file, debug);
    cmp = new CandidateMotifPicker(debug);
    fd = new FlankDetector(ref_fasta_file, debug);
    
    this->debug = debug;
    qual.assign(256, 'K');
};
Esempio n. 2
0
        params(int8_t k, char *bedpath, char *refpath, int padding=DEFAULT_PADDING):
        k(k),
        fai(fai_load(refpath)),
        bed(bedpath, fai, padding)
        {

        }
Esempio n. 3
0
int faidx_main(int argc, char *argv[])
{
	if (argc == 1) {
		fprintf(stderr, "Usage: faidx <in.fasta> [<reg> [...]]\n");
		return 1;
	} else {
		if (argc == 2) fai_build(argv[1]);
		else {
			int i, j, k, l;
			char *s;
			faidx_t *fai;
			fai = fai_load(argv[1]);
			if (fai == 0) return 1;
			for (i = 2; i != argc; ++i) {
				printf(">%s\n", argv[i]);
				s = fai_fetch(fai, argv[i], &l);
				for (j = 0; j < l; j += 60) {
					for (k = 0; k < 60 && k < l - j; ++k)
						putchar(s[j + k]);
					putchar('\n');
				}
				free(s);
			}
			fai_destroy(fai);
		}
	}
	return 0;
}
Esempio n. 4
0
/* check match between reference and bam files. prints an error
 * message and return non-zero on mismatch 
*/
int checkref(char *fasta_file, char *bam_file)
{
     int i = -1;
     bam_header_t *header;
     faidx_t *fai;
     char *ref;
     int ref_len = -1;
     bamFile bam_fp;
     
     if (! file_exists(fasta_file)) {
          LOG_FATAL("Fsata file %s does not exist. Exiting...\n", fasta_file);
          return 1;
     }     

     if (0 != strcmp(bam_file, "-")  && ! file_exists(bam_file)) {
          LOG_FATAL("BAM file %s does not exist. Exiting...\n", bam_file);
          return 1;
     }     

     bam_fp = strcmp(bam_file, "-") == 0 ? bam_dopen(fileno(stdin), "r") : bam_open(bam_file, "r");
     header = bam_header_read(bam_fp);
     if (!header) {
          LOG_FATAL("Failed to read BAM header from %s\n", bam_file);
          return 1;
     }
     
     fai = fai_load(fasta_file);
     if (!fai) {
          LOG_FATAL("Failed to fasta index for %s\n", fasta_file);
          return 1;
     }
     
     for (i=0; i < header->n_targets; i++) {
          LOG_DEBUG("BAM header target %d of %d: name=%s len=%d\n", 
                    i+1, header->n_targets, header->target_name[i], header->target_len[i]);
          
          ref = faidx_fetch_seq(fai, header->target_name[i], 
                                0, 0x7fffffff, &ref_len);
          if (NULL == ref) {
               LOG_FATAL("Failed to fetch sequence %s from fasta file\n", header->target_name[i]);
               return -1;
          }
          if (header->target_len[i] != ref_len) {
               LOG_FATAL("Sequence length mismatch for sequence %s (%dbp in fasta; %dbp in bam)\n", 
                         header->target_name[i], header->target_len[i], ref_len);
               return -1;
          }
          free(ref);
     }
     
     fai_destroy(fai);
     bam_header_destroy(header);
     bam_close(bam_fp);

     return 0;
}
Esempio n. 5
0
bool IndexedFastaReader::Open(const std::string &filename)
{
    faidx_t* handle = fai_load(filename.c_str());
    if (handle == nullptr)
        return false;
    else
    {
        filename_ = filename;
        handle_ = handle;
        return true;
    }
}
Esempio n. 6
0
/**
 * Constructor.
 */
FlankDetector::FlankDetector(std::string& ref_fasta_file, bool debug)
{
    //////////////////////
    //initialize variables
    //////////////////////
    this->debug = debug;

    ///////////////////
    //initialize raHMMs
    ///////////////////
    float delta = 0.0001;
    float epsilon = 0.0005;
    float tau = 0.01;
    float eta = 0.01;
    float mismatch_penalty = 3;

    ahmm = new AHMM(false);
    ahmm->set_delta(delta);
    ahmm->set_epsilon(epsilon);
    ahmm->set_tau(tau);
    ahmm->set_eta(eta);
    ahmm->set_mismatch_penalty(mismatch_penalty);
    ahmm->initialize_T();

    lfhmm = new LFHMM(false);
    lfhmm->set_delta(delta);
    lfhmm->set_epsilon(epsilon);
    lfhmm->set_tau(tau);
    lfhmm->set_eta(eta);
    lfhmm->set_mismatch_penalty(mismatch_penalty);
    lfhmm->initialize_T();

    rfhmm = new RFHMM(false);
    rfhmm->set_delta(delta);
    rfhmm->set_epsilon(epsilon);
    rfhmm->set_tau(tau);
    rfhmm->set_eta(eta);
    rfhmm->set_mismatch_penalty(mismatch_penalty);
    rfhmm->initialize_T();

    qual.assign(1000, 'K');

    //////////////////
    //initialize tools
    //////////////////
    fai = fai_load(ref_fasta_file.c_str());
    if (fai==NULL)
    {
        fprintf(stderr, "[%s:%d %s] Cannot load genome index: %s\n", __FILE__, __LINE__, __FUNCTION__, ref_fasta_file.c_str());
        exit(1);
    }
};
Esempio n. 7
0
/**
 * Constructor.
 *
 * @ref_fasta_file reference sequence FASTA file.
 */
VariantManip::VariantManip(std::string ref_fasta_file)
{
    if (ref_fasta_file!="")
    {
        fai = fai_load(ref_fasta_file.c_str());
        if (fai==NULL)
        {
            fprintf(stderr, "[%s:%d %s] Cannot load genome index: %s\n", __FILE__, __LINE__, __FUNCTION__, ref_fasta_file.c_str());
            exit(1);
        }
        reference_present = (fai!=NULL);
    }
};
/**
 * Constructor.
 */
CandidateRegionExtractor::CandidateRegionExtractor(std::string& ref_fasta_file, bool debug)
{
    vm = new VariantManip(ref_fasta_file.c_str());
    fai = fai_load(ref_fasta_file.c_str());
    if (fai==NULL)
    {
        fprintf(stderr, "[%s:%d %s] Cannot load genome index: %s\n", __FILE__, __LINE__, __FUNCTION__, ref_fasta_file.c_str());
        exit(1);
    }

    max_mlen = 10;
    
   this->debug = debug;
    
    mt = new MotifTree(max_mlen, debug);
};
Esempio n. 9
0
int add_dindel(const char *bam_in, const char *bam_out, const char *ref)
{
	data_t_dindel tmp;
    int count = 0;
    bam1_t *b = NULL;

	if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) {
         LOG_FATAL("Failed to open BAM file %s\n", bam_in);
             return 1;
        }
    if ((tmp.fai = fai_load(ref)) == 0) {
         LOG_FATAL("Failed to open reference file %s\n", ref);
         return 1;
    }
    /*warn_old_fai(ref);*/

    if (!bam_out || bam_out[0] == '-') {
         tmp.out = bam_dopen(fileno(stdout), "w");
    } else {
         tmp.out = bam_open(bam_out, "w");
    }
    bam_header_write(tmp.out, tmp.in->header);
    
    b = bam_init1();
    tmp.tid = -1;
    tmp.hpcount = 0;
    tmp.rlen = 0;
    while (samread(tmp.in, b) >= 0) {
         count++;
         dindel_fetch_func(b, &tmp); 
    }
    bam_destroy1(b);
    
    if (tmp.hpcount) free(tmp.hpcount);
    samclose(tmp.in);
    bam_close(tmp.out);
    fai_destroy(tmp.fai);
	LOG_VERBOSE("Processed %d reads\n", count);
	return 0;
}
Esempio n. 10
0
File: sam.c Progetto: atks/vt
static void faidx1(const char *filename)
{
    int n, n_exp = 0, n_fq_exp = 0;
    char tmpfilename[FILENAME_MAX], line[500];
    FILE *fin, *fout;
    faidx_t *fai;

    fin = fopen(filename, "rb");
    if (fin == NULL) fail("can't open %s\n", filename);
    sprintf(tmpfilename, "%s.tmp", filename);
    fout = fopen(tmpfilename, "wb");
    if (fout == NULL) fail("can't create temporary %s\n", tmpfilename);
    while (fgets(line, sizeof line, fin)) {
        if (line[0] == '>') n_exp++;
        if (line[0] == '+' && line[1] == '\n') n_fq_exp++;
        fputs(line, fout);
    }
    fclose(fin);
    fclose(fout);

    if (n_exp == 0 && n_fq_exp != 0) {
        // probably a fastq file
        n_exp = n_fq_exp;
    }

    if (fai_build(tmpfilename) < 0) fail("can't index %s", tmpfilename);
    fai = fai_load(tmpfilename);
    if (fai == NULL) { fail("can't load faidx file %s", tmpfilename); return; }

    n = faidx_fetch_nseq(fai);
    if (n != n_exp)
        fail("%s: faidx_fetch_nseq returned %d, expected %d", filename, n, n_exp);

    n = faidx_nseq(fai);
    if (n != n_exp)
        fail("%s: faidx_nseq returned %d, expected %d", filename, n, n_exp);

    fai_destroy(fai);
}
Esempio n. 11
0
int ctx_vcfcov(int argc, char **argv)
{
  struct MemArgs memargs = MEM_ARGS_INIT;
  const char *out_path = NULL, *out_type = NULL;

  uint32_t max_allele_len = 0, max_gt_vars = 0;
  char *ref_path = NULL;
  bool low_mem = false;

  // Arg parsing
  char cmd[100];
  char shortopts[300];
  cmd_long_opts_to_short(longopts, shortopts, sizeof(shortopts));
  int c;
  size_t i;

  // silence error messages from getopt_long
  // opterr = 0;

  while((c = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
    cmd_get_longopt_str(longopts, c, cmd, sizeof(cmd));
    switch(c) {
      case 0: /* flag set */ break;
      case 'h': cmd_print_usage(NULL); break;
      case 'o': cmd_check(!out_path, cmd); out_path = optarg; break;
      case 'O': cmd_check(!out_type, cmd); out_type = optarg; break;
      case 'f': cmd_check(!futil_get_force(), cmd); futil_set_force(true); break;
      case 'm': cmd_mem_args_set_memory(&memargs, optarg); break;
      case 'n': cmd_mem_args_set_nkmers(&memargs, optarg); break;
      case 'r': cmd_check(!ref_path, cmd); ref_path = optarg; break;
      case 'L': cmd_check(!max_allele_len,cmd); max_allele_len = cmd_uint32(cmd,optarg); break;
      case 'N': cmd_check(!max_gt_vars,cmd); max_gt_vars = cmd_uint32(cmd,optarg); break;
      case 'M': cmd_check(!low_mem, cmd); low_mem = true; break;
      case ':': /* BADARG */
      case '?': /* BADCH getopt_long has already printed error */
        // cmd_print_usage(NULL);
        die("`"CMD" "SUBCMD" -h` for help. Bad option: %s", argv[optind-1]);
      default: abort();
    }
  }

  // Defaults for unset values
  if(out_path == NULL) out_path = "-";
  if(ref_path == NULL) cmd_print_usage("Require a reference (-r,--ref <ref.fa>)");
  if(optind+2 > argc) cmd_print_usage("Require VCF and graph files");

  if(!max_allele_len) max_allele_len = DEFAULT_MAX_ALLELE_LEN;
  if(!max_gt_vars) max_gt_vars = DEFAULT_MAX_GT_VARS;

  status("[vcfcov] max allele length: %u; max number of variants: %u",
         max_allele_len, max_gt_vars);

  // open ref
  // index fasta with: samtools faidx ref.fa
  faidx_t *fai = fai_load(ref_path);
  if(fai == NULL) die("Cannot load ref index: %s / %s.fai", ref_path, ref_path);

  // Open input VCF file
  const char *vcf_path = argv[optind++];
  htsFile *vcffh = hts_open(vcf_path, "r");
  if(vcffh == NULL) die("Cannot open VCF file: %s", vcf_path);
  bcf_hdr_t *vcfhdr = bcf_hdr_read(vcffh);
  if(vcfhdr == NULL) die("Cannot read VCF header: %s", vcf_path);

  // Test we can close and reopen files
  if(low_mem) {
    if((vcffh = hts_open(vcf_path, "r")) == NULL)
      die("Cannot re-open VCF file: %s", vcf_path);
    if((vcfhdr = bcf_hdr_read(vcffh)) == NULL)
      die("Cannot re-read VCF header: %s", vcf_path);
  }

  //
  // Open graph files
  //
  const size_t num_gfiles = argc - optind;
  char **graph_paths = argv + optind;
  ctx_assert(num_gfiles > 0);

  GraphFileReader *gfiles = ctx_calloc(num_gfiles, sizeof(GraphFileReader));
  size_t ncols, ctx_max_kmers = 0, ctx_sum_kmers = 0;

  ncols = graph_files_open(graph_paths, gfiles, num_gfiles,
                           &ctx_max_kmers, &ctx_sum_kmers);

  // Check graph + paths are compatible
  graphs_gpaths_compatible(gfiles, num_gfiles, NULL, 0, -1);

  //
  // Decide on memory
  //
  size_t bits_per_kmer, kmers_in_hash, graph_mem;

  bits_per_kmer = sizeof(BinaryKmer)*8 + sizeof(Covg)*8 * ncols;
  kmers_in_hash = cmd_get_kmers_in_hash(memargs.mem_to_use,
                                        memargs.mem_to_use_set,
                                        memargs.num_kmers,
                                        memargs.num_kmers_set,
                                        bits_per_kmer,
                                        low_mem ? -1 : (int64_t)ctx_max_kmers,
                                        ctx_sum_kmers,
                                        true, &graph_mem);

  cmd_check_mem_limit(memargs.mem_to_use, graph_mem);

  //
  // Open output file
  //
  // v=>vcf, z=>compressed vcf, b=>bcf, bu=>uncompressed bcf
  int mode = vcf_misc_get_outtype(out_type, out_path);
  futil_create_output(out_path);
  htsFile *outfh = hts_open(out_path, modes_htslib[mode]);
  status("[vcfcov] Output format: %s", hsmodes_htslib[mode]);


  // Allocate memory
  dBGraph db_graph;
  db_graph_alloc(&db_graph, gfiles[0].hdr.kmer_size, ncols, 1, kmers_in_hash,
                 DBG_ALLOC_COVGS);

  //
  // Set up tag names
  //

  // *R => ref, *A => alt
  sprintf(kcov_ref_tag, "K%zuR", db_graph.kmer_size); // mean coverage
  sprintf(kcov_alt_tag, "K%zuA", db_graph.kmer_size);

  // #SAMPLE=<ID=...,K29KCOV=...,K29NK=...,K29RLK>
  // - K29_kcov is empirical kmer coverage
  // - K29_nkmers is the number of kmers in the sample
  // - mean_read_length is the mean read length in bases
  char sample_kcov_tag[20], sample_nk_tag[20], sample_rlk_tag[20];
  sprintf(sample_kcov_tag, "K%zu_kcov", db_graph.kmer_size); // mean coverage
  sprintf(sample_nk_tag, "K%zu_nkmers", db_graph.kmer_size);
  sprintf(sample_rlk_tag, "mean_read_length");

  //
  // Load kmers if we are using --low-mem
  //

  VcfCovStats st;
  memset(&st, 0, sizeof(st));
  VcfCovPrefs prefs = {.kcov_ref_tag = kcov_ref_tag,
                       .kcov_alt_tag = kcov_alt_tag,
                       .max_allele_len = max_allele_len,
                       .max_gt_vars = max_gt_vars,
                       .load_kmers_only = false};

  if(low_mem)
  {
    status("[vcfcov] Loading kmers from VCF+ref");

    prefs.load_kmers_only = true;
    vcfcov_file(vcffh, vcfhdr, NULL, NULL, vcf_path, fai,
                NULL, &prefs, &st, &db_graph);

    // Close files
    hts_close(vcffh);
    bcf_hdr_destroy(vcfhdr);

    // Re-open files
    if((vcffh = hts_open(vcf_path, "r")) == NULL)
      die("Cannot re-open VCF file: %s", vcf_path);
    if((vcfhdr = bcf_hdr_read(vcffh)) == NULL)
      die("Cannot re-read VCF header: %s", vcf_path);

    prefs.load_kmers_only = false;
  }

  //
  // Load graphs
  //
  GraphLoadingStats gstats;
  memset(&gstats, 0, sizeof(gstats));

  GraphLoadingPrefs gprefs = graph_loading_prefs(&db_graph);
  gprefs.must_exist_in_graph = low_mem;

  for(i = 0; i < num_gfiles; i++) {
    graph_load(&gfiles[i], gprefs, &gstats);
    graph_file_close(&gfiles[i]);
  }
  ctx_free(gfiles);

  hash_table_print_stats(&db_graph.ht);

  //
  // Set up VCF header / graph matchup
  //
  size_t *samplehdrids = ctx_malloc(db_graph.num_of_cols * sizeof(size_t));

  // Add samples to vcf header
  bcf_hdr_t *outhdr = bcf_hdr_dup(vcfhdr);
  bcf_hrec_t *hrec;
  int sid;
  char hdrstr[200];

  for(i = 0; i < db_graph.num_of_cols; i++) {
    char *sname = db_graph.ginfo[i].sample_name.b;
    if((sid = bcf_hdr_id2int(outhdr, BCF_DT_SAMPLE, sname)) < 0) {
      bcf_hdr_add_sample(outhdr, sname);
      sid = bcf_hdr_id2int(outhdr, BCF_DT_SAMPLE, sname);
    }
    samplehdrids[i] = sid;

    // Add SAMPLE field
    hrec = bcf_hdr_get_hrec(outhdr, BCF_HL_STR, "ID", sname, "SAMPLE");

    if(hrec == NULL) {
      sprintf(hdrstr, "##SAMPLE=<ID=%s,%s=%"PRIu64",%s=%"PRIu64",%s=%zu>", sname,
              sample_kcov_tag,
              gstats.nkmers[i] ? gstats.sumcov[i] / gstats.nkmers[i] : 0,
              sample_nk_tag, gstats.nkmers[i],
              sample_rlk_tag, (size_t)db_graph.ginfo[i].mean_read_length);
      bcf_hdr_append(outhdr, hdrstr);
    }
    else {
      // mean kcovg
      sprintf(hdrstr, "%"PRIu64, gstats.sumcov[i] / gstats.nkmers[i]);
      vcf_misc_add_update_hrec(hrec, sample_kcov_tag, hdrstr);
      // num kmers
      sprintf(hdrstr, "%"PRIu64, gstats.nkmers[i]);
      vcf_misc_add_update_hrec(hrec, sample_nk_tag, hdrstr);
      // mean read length in kmers
      sprintf(hdrstr, "%zu", (size_t)db_graph.ginfo[i].mean_read_length);
      vcf_misc_add_update_hrec(hrec, sample_rlk_tag, hdrstr);
    }

    status("[vcfcov] Colour %zu: %s [VCF column %zu]", i, sname, samplehdrids[i]);
  }

  // Add genotype format fields
  // One field per alternative allele

  sprintf(hdrstr, "##FORMAT=<ID=%s,Number=A,Type=Integer,"
          "Description=\"Coverage on ref (k=%zu): sum(kmer_covs) / exp_num_kmers\">\n",
          kcov_ref_tag, db_graph.kmer_size);
  bcf_hdr_append(outhdr, hdrstr);
  sprintf(hdrstr, "##FORMAT=<ID=%s,Number=A,Type=Integer,"
          "Description=\"Coverage on alt (k=%zu): sum(kmer_covs) / exp_num_kmers\">\n",
          kcov_alt_tag, db_graph.kmer_size);
  bcf_hdr_append(outhdr, hdrstr);

  bcf_hdr_set_version(outhdr, "VCFv4.2");

  // Add command string to header
  vcf_misc_hdr_add_cmd(outhdr, cmd_get_cmdline(), cmd_get_cwd());

  if(bcf_hdr_write(outfh, outhdr) != 0)
    die("Cannot write header to: %s", futil_outpath_str(out_path));

  status("[vcfcov] Reading %s and adding coverage", vcf_path);

  // Reset stats and get coverage
  memset(&st, 0, sizeof(st));

  vcfcov_file(vcffh, vcfhdr, outfh, outhdr, vcf_path, fai,
              samplehdrids, &prefs, &st, &db_graph);

  // Print statistics
  char ns0[50], ns1[50];
  status("[vcfcov] Read %s VCF lines", ulong_to_str(st.nvcf_lines, ns0));
  status("[vcfcov] Read %s ALTs", ulong_to_str(st.nalts_read, ns0));
  status("[vcfcov] Used %s kmers", ulong_to_str(st.ngt_kmers, ns0));
  status("[vcfcov] ALTs used: %s / %s (%.2f%%)",
         ulong_to_str(st.nalts_loaded, ns0), ulong_to_str(st.nalts_read, ns1),
         st.nalts_read ? (100.0*st.nalts_loaded) / st.nalts_read : 0.0);
  status("[vcfcov] ALTs too long (>%ubp): %s / %s (%.2f%%)", max_allele_len,
         ulong_to_str(st.nalts_too_long, ns0), ulong_to_str(st.nalts_read, ns1),
         st.nalts_read ? (100.0*st.nalts_too_long) / st.nalts_read : 0.0);
  status("[vcfcov] ALTs too dense (>%u within %zubp): %s / %s (%.2f%%)",
         max_gt_vars, db_graph.kmer_size,
         ulong_to_str(st.nalts_no_covg, ns0), ulong_to_str(st.nalts_read, ns1),
         st.nalts_read ? (100.0*st.nalts_no_covg) / st.nalts_read : 0.0);
  status("[vcfcov] ALTs printed with coverage: %s / %s (%.2f%%)",
         ulong_to_str(st.nalts_with_covg, ns0), ulong_to_str(st.nalts_read, ns1),
         st.nalts_read ? (100.0*st.nalts_with_covg) / st.nalts_read : 0.0);

  status("[vcfcov] Saved to: %s\n", out_path);

  ctx_free(samplehdrids);
  graph_loading_stats_destroy(&gstats);

  bcf_hdr_destroy(vcfhdr);
  bcf_hdr_destroy(outhdr);
  hts_close(vcffh);
  hts_close(outfh);
  fai_destroy(fai);
  db_graph_dealloc(&db_graph);

  return EXIT_SUCCESS;
}
Esempio n. 12
0
int main(int argc , char *argv[])
{
	
	FILE *fin , *fout ;

	vcf_info    vcf ;

	faidx_t   *fai ;
	
	char	buffer[32];
	
	char *s = malloc(1024*sizeof(char));

	if( argc != 4 ){
		fprintf(stderr,"sv2vcf  [ref.fa]  [input] [output] \n");
		return 1 ;
	}

	fai =  fai_load(argv[1]);
	
	if(!fai){
		fprintf(stderr,"can't open the file:%s.fai\n",argv[1]);
		return  1;
	}


	fin =  fopen(argv[2],"r");

	if(!fin){
		fprintf(stderr,"can't open the file:%s\n", argv[2]);
		return 1 ;
	}

	fout = fopen(argv[3],"w");
	if(!fout){
		fprintf(stderr,"can't open the file:%s\n", argv[3]);
		return 1 ;
	}
	fprintf(fout,"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILT\tINFO\n");
	while(!feof(fin)){
		char *tmp ;
		int  len ;
		fscanf(fin,"%s\t",buffer);
		if(!strcmp(buffer,"TRSdL")){
			fscanf(fin,"%s\t",buffer);
			fscanf(fin,"%d\t%d\n",&vcf.pos[0],&vcf.pos[1]);
			int	k  ;
			for(  k = 0 ;  k < 2 ; k++){
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[k],vcf.pos[k]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[k] = *tmp ;
				free(tmp);
			}
			fprintf(fout,"%s\t%d\tbnd_V\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],buffer,vcf.pos[1],vcf.mut[0]);
			fprintf(fout,"%s\t%d\tbnd_W\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],buffer,vcf.pos[0],vcf.mut[1]);

		}else if(!strcmp(buffer,"TRSdR")){
			fscanf(fin,"%s\t",buffer);
			fscanf(fin,"%d\t%d\n",&vcf.pos[0],&vcf.pos[1]);
			int	k  ;
			for(  k = 0 ;  k < 2 ; k++){
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[k],vcf.pos[k]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[k] = *tmp ;
				free(tmp);
			}
			fprintf(fout,"%s\t%d\tbnd_V\t%c\t%s:%d]%c]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],buffer,vcf.pos[1],vcf.mut[0]);
			fprintf(fout,"%s\t%d\tbnd_W\t%c\t%s:%d]%c]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],buffer,vcf.pos[0],vcf.mut[1]);

		}else if((tmp = strstr(buffer,"TandemCNV"))){
			tmp = tmp + 9 ;
			vcf.type = atoi(tmp);
			fscanf(fin,"%s\t",buffer);
			if(vcf.type == 1 ) {
				fscanf(fin,"%*[^\n]\n");
				continue ;
			}
			fscanf(fin,"%d\t%d\n",&vcf.pos[0],&vcf.pos[1]);
			int	k  ;
			for(  k = 0 ;  k < 2 ; k++){
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[k],vcf.pos[k]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[k] = *tmp ;
				free(tmp);
			}
			if(vcf.type == 2){
				fprintf(fout,"%s\t%d\tbnd_V\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],vcf.mut[1],buffer,vcf.pos[1]);
				fprintf(fout,"%s\t%d\tbnd_W\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],vcf.mut[1],buffer,vcf.pos[1]);
			}else {
				fprintf(fout,"%s\t%d\tbnd_V\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],buffer,vcf.pos[0],vcf.mut[0]);
				fprintf(fout,"%s\t%d\tbnd_W\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],buffer,vcf.pos[0],vcf.mut[0]);
			}


		}else if((tmp = strstr( buffer,"CNV"))){
			
			tmp = tmp + 3;
			vcf.type = atoi(tmp);
			fscanf(fin,"%s\t",buffer);
			fscanf(fin,"%d\t%d\t%d\n",&vcf.pos[0],&vcf.pos[1],&vcf.pos[2]);
			if( vcf.type == 1 ){

				sprintf(s,"%s:%d-%d",buffer,vcf.pos[0],vcf.pos[0]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[0] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[2],vcf.pos[2]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[2] = *tmp ;
				free(tmp);

				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[1],vcf.pos[1]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[1] = *tmp ;
				free(tmp);
				
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[2]+1,vcf.pos[2]+1);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[3] = *tmp ;
				free(tmp);

				fprintf(fout,"%s\t%d\tbnd_V\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],buffer,vcf.pos[2],vcf.mut[0]);
				fprintf(fout,"%s\t%d\tbnd_W\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[2],vcf.mut[2],vcf.mut[2],buffer,vcf.pos[0]);
				fprintf(fout,"%s\t%d\tbnd_U\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_X;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],vcf.mut[1],buffer,vcf.pos[2]+1);
				fprintf(fout,"%s\t%d\tbnd_X\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_U;EVENT=PRO\n",buffer,vcf.pos[2]+1,vcf.mut[3],buffer,vcf.pos[1],vcf.mut[3]);
			}else if( vcf.type == 2 ){
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[0],vcf.pos[0]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[0] = *tmp ;
				free(tmp);

				sprintf(s,"%s:%d-%d",buffer,vcf.pos[2]+1,vcf.pos[2]+1);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[3] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[1],vcf.pos[1]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[1] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[2],vcf.pos[2]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[2] = *tmp ;
				free(tmp);

				fprintf(fout,"%s\t%d\tbnd_V\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],buffer,vcf.pos[2]+1,vcf.mut[0]);
				fprintf(fout,"%s\t%d\tbnd_W\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_U;EVENT=PRO\n",buffer,vcf.pos[2]+1,vcf.mut[3],buffer,vcf.pos[0],vcf.mut[3]);
				fprintf(fout,"%s\t%d\tbnd_U\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_X;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],vcf.mut[1],buffer,vcf.pos[2]);
				fprintf(fout,"%s\t%d\tbnd_X\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_U;EVENT=PRO\n",buffer,vcf.pos[2],vcf.mut[2],vcf.mut[2],buffer,vcf.pos[1]);


			}else if(vcf.type == 3){
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[0],vcf.pos[0]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[0] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[1],vcf.pos[1]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[1] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[0]+1,vcf.pos[0]+1);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[2] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[2],vcf.pos[2]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[3] = *tmp ;
				free(tmp);

				fprintf(fout,"%s\t%d\tbnd_V\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],vcf.mut[0],buffer,vcf.pos[1]);
				fprintf(fout,"%s\t%d\tbnd_W\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],buffer,vcf.pos[0],vcf.mut[1]);
				fprintf(fout,"%s\t%d\tbnd_U\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_X;EVENT=PRO\n",buffer,vcf.pos[0]+1,vcf.mut[2],buffer,vcf.pos[2],vcf.mut[2]);
				fprintf(fout,"%s\t%d\tbnd_X\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[2],vcf.mut[3],vcf.mut[3],buffer,vcf.pos[0]+1);

			}else if(vcf.type == 4){
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[0],vcf.pos[0]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[0] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[1],vcf.pos[1]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[1] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[0]+1,vcf.pos[0]+1);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[2] = *tmp ;
				free(tmp);
				
				sprintf(s,"%s:%d-%d",buffer,vcf.pos[2],vcf.pos[2]);
				tmp = fai_fetch(fai,s,&len);
				vcf.mut[3] = *tmp ;
				free(tmp);

				fprintf(fout,"%s\t%d\tbnd_V\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],vcf.mut[0],buffer,vcf.pos[2]);
				fprintf(fout,"%s\t%d\tbnd_W\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[2],vcf.mut[3],vcf.mut[3],buffer,vcf.pos[0]);
				fprintf(fout,"%s\t%d\tbnd_U\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_X;EVENT=PRO\n",buffer,vcf.pos[0]+1,vcf.mut[2],buffer,vcf.pos[1],vcf.mut[2]);
				fprintf(fout,"%s\t%d\tbnd_X\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_U;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[3],buffer,vcf.pos[0]+1,vcf.mut[3]);
			}
		}else if((tmp =strstr(buffer,"TRS"))){
			
			tmp = tmp + 3;
			vcf.type = atoi(tmp);
			fscanf(fin,"%s\t",buffer);
			if( vcf.type == 2 || vcf.type == 3 ){
				fscanf(fin,"%d\t%d\t%d\t%d\n",&vcf.pos[1],&vcf.pos[2],&vcf.pos[5],&vcf.pos[6]);

				vcf.pos[0] = vcf.pos[1] - 1 ;
				vcf.pos[3] = vcf.pos[2] + 1 ;
				vcf.pos[4] = vcf.pos[5] - 1 ;
				vcf.pos[7] = vcf.pos[6] + 1 ;

				int  k  = 0 ;
				for( k = 0 ;  k < 8 ; k++){
					sprintf(s,"%s:%d-%d",buffer,vcf.pos[k],vcf.pos[k]);
					tmp = fai_fetch(fai,s,&len);
					vcf.mut[k] = *tmp ;
					free(tmp);
				}
				if(vcf.type == 2 ){
					fprintf(fout,"%s\t%d\tbnd_V\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],vcf.mut[0],buffer,vcf.pos[6]);
					fprintf(fout,"%s\t%d\tbnd_W\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[6],vcf.mut[6],vcf.mut[6],buffer,vcf.pos[0]);
					fprintf(fout,"%s\t%d\tbnd_U\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_X;EVENT=PRO\n",buffer,vcf.pos[3],vcf.mut[3],buffer,vcf.pos[5],vcf.mut[3]);
					fprintf(fout,"%s\t%d\tbnd_X\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_U;EVENT=PRO\n",buffer,vcf.pos[5],vcf.mut[5],buffer,vcf.pos[3],vcf.mut[5]);
					
					fprintf(fout,"%s\t%d\tbnd_E\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_D;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],buffer,vcf.pos[4],vcf.mut[1]);
					fprintf(fout,"%s\t%d\tbnd_D\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_E;EVENT=PRO\n",buffer,vcf.pos[4],vcf.mut[4],vcf.mut[4],buffer,vcf.pos[1]);
					fprintf(fout,"%s\t%d\tbnd_Z\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_H;EVENT=PRO\n",buffer,vcf.pos[2],vcf.mut[2],vcf.mut[2],buffer,vcf.pos[7]);
					fprintf(fout,"%s\t%d\tbnd_H\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_Z;EVENT=PRO\n",buffer,vcf.pos[7],vcf.mut[7],buffer,vcf.pos[2],vcf.mut[7]);
					

				}else {
					fprintf(fout,"%s\t%d\tbnd_V\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],vcf.mut[0],buffer,vcf.pos[5]);
					fprintf(fout,"%s\t%d\tbnd_W\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_V;EVENT=PRO\n",buffer,vcf.pos[5],vcf.mut[5],buffer,vcf.pos[0],vcf.mut[5]);
					fprintf(fout,"%s\t%d\tbnd_U\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_X;EVENT=PRO\n",buffer,vcf.pos[3],vcf.mut[3],buffer,vcf.pos[6],vcf.mut[3]);
					fprintf(fout,"%s\t%d\tbnd_X\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_U;EVENT=PRO\n",buffer,vcf.pos[6],vcf.mut[6],vcf.mut[6],buffer,vcf.pos[3]);
					
					fprintf(fout,"%s\t%d\tbnd_E\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_D;EVENT=PRO\n",buffer,vcf.pos[2],vcf.mut[2],vcf.mut[2],buffer,vcf.pos[4]);
					fprintf(fout,"%s\t%d\tbnd_D\t%c\t%c]%s:%d]\t6\tPASS\tSVTYPE=BND;MATEID=bnd_E;EVENT=PRO\n",buffer,vcf.pos[4],vcf.mut[4],vcf.mut[4],buffer,vcf.pos[2]);
					fprintf(fout,"%s\t%d\tbnd_Z\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_H;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],buffer,vcf.pos[7],vcf.mut[1]);
					fprintf(fout,"%s\t%d\tbnd_H\t%c\t[%s:%d[%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_Z;EVENT=PRO\n",buffer,vcf.pos[7],vcf.mut[7],buffer,vcf.pos[1],vcf.mut[7]);

				}
			}else if(vcf.type == 1){
				fscanf(fin,"%d\t%d\n",&vcf.pos[1],&vcf.pos[2]);
				vcf.pos[0] = vcf.pos[1] - 1 ;
				vcf.pos[3] = vcf.pos[2] + 1 ;
				int	k ;
				for( k = 0 ;  k < 4 ; k++){
					sprintf(s,"%s:%d-%d",buffer,vcf.pos[k],vcf.pos[k]);
					tmp = fai_fetch(fai,s,&len);
					vcf.mut[k] = *tmp ;
					free(tmp);
				}
				fprintf(fout,"%s\t%d\tbnd_V\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[0],vcf.mut[0],vcf.mut[0],buffer,vcf.pos[2]);
				fprintf(fout,"%s\t%d\tbnd_W\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[2],vcf.mut[2],buffer,vcf.pos[0],vcf.mut[2]);
				fprintf(fout,"%s\t%d\tbnd_U\t%c\t]%s:%d]%c\t6\tPASS\tSVTYPE=BND;MATEID=bnd_X;EVENT=PRO\n",buffer,vcf.pos[1],vcf.mut[1],buffer,vcf.pos[3],vcf.mut[1]);
				fprintf(fout,"%s\t%d\tbnd_X\t%c\t%c[%s:%d[\t6\tPASS\tSVTYPE=BND;MATEID=bnd_W;EVENT=PRO\n",buffer,vcf.pos[3],vcf.mut[3],vcf.mut[3],buffer,vcf.pos[1]);


			}else{
				fscanf(fin,"%*[^\n]\n");
			}
		}else{
				fscanf(fin,"%*[^\n]\n");
		}
	}
	
	free(s);
	fclose(fin);
	fclose(fout);
	return 0;
}
Esempio n. 13
0
int bam_mpileup(int argc, char *argv[])
{
    int c;
    const char *file_list = NULL;
    char **fn = NULL;
    int nfiles = 0, use_orphan = 0, noref = 0;
    mplp_conf_t mplp;
    memset(&mplp, 0, sizeof(mplp_conf_t));
    mplp.min_baseQ = 13;
    mplp.capQ_thres = 0;
    mplp.max_depth = 250; mplp.max_indel_depth = 250;
    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
    mplp.min_frac = 0.002; mplp.min_support = 1;
    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
    mplp.argc = argc; mplp.argv = argv;
    mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
    mplp.output_fname = NULL;
    mplp.output_type = FT_VCF;
    mplp.record_cmd_line = 1;
    mplp.n_threads = 0;
    mplp.bsmpl = bam_smpl_init();

    static const struct option lopts[] =
    {
        {"rf", required_argument, NULL, 1},   // require flag
        {"ff", required_argument, NULL, 2},   // filter flag
        {"incl-flags", required_argument, NULL, 1},
        {"excl-flags", required_argument, NULL, 2},
        {"output", required_argument, NULL, 3},
        {"open-prob", required_argument, NULL, 4},
        {"ignore-RG", no_argument, NULL, 5},
        {"ignore-rg", no_argument, NULL, 5},
        {"gvcf", required_argument, NULL, 'g'},
        {"no-reference", no_argument, NULL, 7},
        {"no-version", no_argument, NULL, 8},
        {"threads",required_argument,NULL,9},
        {"illumina1.3+", no_argument, NULL, '6'},
        {"count-orphans", no_argument, NULL, 'A'},
        {"bam-list", required_argument, NULL, 'b'},
        {"no-BAQ", no_argument, NULL, 'B'},
        {"no-baq", no_argument, NULL, 'B'},
        {"adjust-MQ", required_argument, NULL, 'C'},
        {"adjust-mq", required_argument, NULL, 'C'},
        {"max-depth", required_argument, NULL, 'd'},
        {"redo-BAQ", no_argument, NULL, 'E'},
        {"redo-baq", no_argument, NULL, 'E'},
        {"fasta-ref", required_argument, NULL, 'f'},
        {"read-groups", required_argument, NULL, 'G'},
        {"region", required_argument, NULL, 'r'},
        {"regions", required_argument, NULL, 'r'},
        {"regions-file", required_argument, NULL, 'R'},
        {"targets", required_argument, NULL, 't'},
        {"targets-file", required_argument, NULL, 'T'},
        {"min-MQ", required_argument, NULL, 'q'},
        {"min-mq", required_argument, NULL, 'q'},
        {"min-BQ", required_argument, NULL, 'Q'},
        {"min-bq", required_argument, NULL, 'Q'},
        {"ignore-overlaps", no_argument, NULL, 'x'},
        {"output-type", required_argument, NULL, 'O'},
        {"samples", required_argument, NULL, 's'},
        {"samples-file", required_argument, NULL, 'S'},
        {"annotate", required_argument, NULL, 'a'},
        {"ext-prob", required_argument, NULL, 'e'},
        {"gap-frac", required_argument, NULL, 'F'},
        {"tandem-qual", required_argument, NULL, 'h'},
        {"skip-indels", no_argument, NULL, 'I'},
        {"max-idepth", required_argument, NULL, 'L'},
        {"min-ireads ", required_argument, NULL, 'm'},
        {"per-sample-mF", no_argument, NULL, 'p'},
        {"per-sample-mf", no_argument, NULL, 'p'},
        {"platforms", required_argument, NULL, 'P'},
        {NULL, 0, NULL, 0}
    };
    while ((c = getopt_long(argc, argv, "Ag:f:r:R:q:Q:C:Bd:L:b:P:po:e:h:Im:F:EG:6O:xa:s:S:t:T:",lopts,NULL)) >= 0) {
        switch (c) {
        case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
        case  1 :
            mplp.rflag_require = bam_str2flag(optarg);
            if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; }
            break;
        case  2 :
            mplp.rflag_filter = bam_str2flag(optarg);
            if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; }
            break;
        case  3 : mplp.output_fname = optarg; break;
        case  4 : mplp.openQ = atoi(optarg); break;
        case  5 : bam_smpl_ignore_readgroups(mplp.bsmpl); break;
        case 'g':
            mplp.gvcf = gvcf_init(optarg);
            if ( !mplp.gvcf ) error("Could not parse: --gvcf %s\n", optarg);
            break;
        case 'f':
            mplp.fai = fai_load(optarg);
            if (mplp.fai == NULL) return 1;
            mplp.fai_fname = optarg;
            break;
        case  7 : noref = 1; break;
        case  8 : mplp.record_cmd_line = 0; break;
        case  9 : mplp.n_threads = strtol(optarg, 0, 0); break;
        case 'd': mplp.max_depth = atoi(optarg); break;
        case 'r': mplp.reg_fname = strdup(optarg); break;
        case 'R': mplp.reg_fname = strdup(optarg); mplp.reg_is_file = 1; break;
        case 't':
                  // In the original version the whole BAM was streamed which is inefficient
                  //  with few BED intervals and big BAMs. Todo: devise a heuristic to determine
                  //  best strategy, that is streaming or jumping.
                  if ( optarg[0]=='^' ) optarg++;
                  else mplp.bed_logic = 1;
                  mplp.bed = regidx_init(NULL,regidx_parse_reg,NULL,0,NULL);
                  mplp.bed_itr = regitr_init(mplp.bed);
                  if ( regidx_insert_list(mplp.bed,optarg,',') !=0 )
                  {
                      fprintf(stderr,"Could not parse the targets: %s\n", optarg);
                      exit(EXIT_FAILURE);
                  }
                  break;
        case 'T':
                  if ( optarg[0]=='^' ) optarg++;
                  else mplp.bed_logic = 1;
                  mplp.bed = regidx_init(optarg,NULL,NULL,0,NULL);
                  if (!mplp.bed) { fprintf(stderr, "bcftools mpileup: Could not read file \"%s\"", optarg); return 1; }
                  break;
        case 'P': mplp.pl_list = strdup(optarg); break;
        case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
        case 'B': mplp.flag &= ~MPLP_REALN; break;
        case 'I': mplp.flag |= MPLP_NO_INDEL; break;
        case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
        case '6': mplp.flag |= MPLP_ILLUMINA13; break;
        case 's': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,0)<0 ) error("Could not read samples: %s\n",optarg); break;
        case 'S': if ( bam_smpl_add_samples(mplp.bsmpl,optarg,1)<0 ) error("Could not read samples: %s\n",optarg); break;
        case 'O': 
            switch (optarg[0]) {
                case 'b': mplp.output_type = FT_BCF_GZ; break;
                case 'u': mplp.output_type = FT_BCF; break;
                case 'z': mplp.output_type = FT_VCF_GZ; break;
                case 'v': mplp.output_type = FT_VCF; break;
                default: error("[error] The option \"-O\" changed meaning when mpileup moved to bcftools. Did you mean: \"bcftools mpileup --output-type\" or \"samtools mpileup --output-BP\"?\n"); 
            }
            break;
        case 'C': mplp.capQ_thres = atoi(optarg); break;
        case 'q': mplp.min_mq = atoi(optarg); break;
        case 'Q': mplp.min_baseQ = atoi(optarg); break;
        case 'b': file_list = optarg; break;
        case 'o': {
                char *end;
                long value = strtol(optarg, &end, 10);
                // Distinguish between -o INT and -o FILE (a bit of a hack!)
                if (*end == '\0') mplp.openQ = value;
                else mplp.output_fname = optarg;
            }
            break;
        case 'e': mplp.extQ = atoi(optarg); break;
        case 'h': mplp.tandemQ = atoi(optarg); break;
        case 'A': use_orphan = 1; break;
        case 'F': mplp.min_frac = atof(optarg); break;
        case 'm': mplp.min_support = atoi(optarg); break;
        case 'L': mplp.max_indel_depth = atoi(optarg); break;
        case 'G': bam_smpl_add_readgroups(mplp.bsmpl, optarg, 1); break;
        case 'a':
            if (optarg[0]=='?') {
                list_annotations(stderr);
                return 1;
            }
            mplp.fmt_flag |= parse_format_flag(optarg);
        break;
        default:
            fprintf(stderr,"Invalid option: '%c'\n", c);
            return 1;
        }
    }

    if ( mplp.gvcf && !(mplp.fmt_flag&B2B_FMT_DP) )
    {
        fprintf(stderr,"[warning] The -t DP option is required with --gvcf, switching on.\n");
        mplp.fmt_flag |= B2B_FMT_DP;
    }
    if ( mplp.flag&(MPLP_BCF|MPLP_VCF|MPLP_NO_COMP) )
    {
        if ( mplp.flag&MPLP_VCF )
        {
            if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_VCF;
            else mplp.output_type = FT_VCF_GZ;
        }
        else if ( mplp.flag&MPLP_BCF )
        {
            if ( mplp.flag&MPLP_NO_COMP ) mplp.output_type = FT_BCF;
            else mplp.output_type = FT_BCF_GZ;
        }
    }
    if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
    {
        fprintf(stderr,"Error: The -B option cannot be combined with -E\n");
        return 1;
    }
    if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
    if (argc == 1)
    {
        print_usage(stderr, &mplp);
        return 1;
    }
    if (!mplp.fai && !noref) {
        fprintf(stderr,"Error: mpileup requires the --fasta-ref option by default; use --no-reference to run without a fasta reference\n");
        return 1;
    }
    int ret,i;
    if (file_list) 
    {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        mplp.files  = fn;
        mplp.nfiles = nfiles;
    }
    else
    {
        mplp.nfiles = argc - optind;
        mplp.files  = (char**) malloc(mplp.nfiles*sizeof(char*));
        for (i=0; i<mplp.nfiles; i++) mplp.files[i] = strdup(argv[optind+i]);
    }
    ret = mpileup(&mplp);

    for (i=0; i<mplp.nfiles; i++) free(mplp.files[i]);
    free(mplp.files);
    free(mplp.reg_fname); free(mplp.pl_list);
    if (mplp.fai) fai_destroy(mplp.fai);
    if (mplp.bed) regidx_destroy(mplp.bed);
    if (mplp.bed_itr) regitr_destroy(mplp.bed_itr);
    if (mplp.reg) regidx_destroy(mplp.reg);
    bam_smpl_destroy(mplp.bsmpl);
    return ret;
}
Esempio n. 14
0
int bam_fillmd(int argc, char *argv[])
{
    int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
    samFile *fp = NULL, *fpout = NULL;
    bam_hdr_t *header = NULL;
    faidx_t *fai = NULL;
    char *ref = NULL, mode_w[8], *ref_file;
    bam1_t *b = NULL;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;

    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
        { NULL, 0, NULL, 0 }
    };

    flt_flag = UPDATE_NM | UPDATE_MD;
    is_bam_out = is_uncompressed = is_realn = max_nm = capQ = baq_flag = 0;
    strcpy(mode_w, "w");
    while ((c = getopt_long(argc, argv, "EqreuNhbSC:n:Ad", lopts, NULL)) >= 0) {
        switch (c) {
        case 'r':
            is_realn = 1;
            break;
        case 'e':
            flt_flag |= USE_EQUAL;
            break;
        case 'd':
            flt_flag |= DROP_TAG;
            break;
        case 'q':
            flt_flag |= BIN_QUAL;
            break;
        case 'h':
            flt_flag |= HASH_QNM;
            break;
        case 'N':
            flt_flag &= ~(UPDATE_MD|UPDATE_NM);
            break;
        case 'b':
            is_bam_out = 1;
            break;
        case 'u':
            is_uncompressed = is_bam_out = 1;
            break;
        case 'S':
            break;
        case 'n':
            max_nm = atoi(optarg);
            break;
        case 'C':
            capQ = atoi(optarg);
            break;
        case 'A':
            baq_flag |= 1;
            break;
        case 'E':
            baq_flag |= 2;
            break;
        default:
            if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
            fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
        /* else fall-through */
        case '?':
            return calmd_usage();
        }
    }
    if (is_bam_out) strcat(mode_w, "b");
    else strcat(mode_w, "h");
    if (is_uncompressed) strcat(mode_w, "0");
    if (optind + (ga.reference == NULL) >= argc)
        return calmd_usage();
    fp = sam_open_format(argv[optind], "r", &ga.in);
    if (fp == NULL) {
        print_error_errno("calmd", "Failed to open input file '%s'", argv[optind]);
        return 1;
    }

    header = sam_hdr_read(fp);
    if (header == NULL || header->n_targets == 0) {
        fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
        goto fail;
    }

    fpout = sam_open_format("-", mode_w, &ga.out);
    if (fpout == NULL) {
        print_error_errno("calmd", "Failed to open output");
        goto fail;
    }
    if (sam_hdr_write(fpout, header) < 0) {
        print_error_errno("calmd", "Failed to write sam header");
        goto fail;
    }

    ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
    fai = fai_load(ref_file);

    if (!fai) {
        print_error_errno("calmd", "Failed to open reference file '%s'", ref_file);
        goto fail;
    }

    b = bam_init1();
    if (!b) {
        fprintf(stderr, "[bam_fillmd] Failed to allocate bam struct\n");
        goto fail;
    }
    while ((ret = sam_read1(fp, header, b)) >= 0) {
        if (b->core.tid >= 0) {
            if (tid != b->core.tid) {
                free(ref);
                ref = fai_fetch(fai, header->target_name[b->core.tid], &len);
                tid = b->core.tid;
                if (ref == 0) { // FIXME: Should this always be fatal?
                    fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
                            header->target_name[tid]);
                    if (is_realn || capQ > 10) goto fail; // Would otherwise crash
                }
            }
            if (is_realn) sam_prob_realn(b, ref, len, baq_flag);
            if (capQ > 10) {
                int q = sam_cap_mapq(b, ref, len, capQ);
                if (b->core.qual > q) b->core.qual = q;
            }
            if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
        }
        if (sam_write1(fpout, header, b) < 0) {
            print_error_errno("calmd", "failed to write to output file");
            goto fail;
        }
    }
    if (ret < -1) {
        fprintf(stderr, "[bam_fillmd] Error reading input.\n");
        goto fail;
    }
    bam_destroy1(b);
    bam_hdr_destroy(header);

    free(ref);
    fai_destroy(fai);
    sam_close(fp);
    if (sam_close(fpout) < 0) {
        fprintf(stderr, "[bam_fillmd] error when closing output file\n");
        return 1;
    }
    return 0;

fail:
    free(ref);
    if (b) bam_destroy1(b);
    if (header) bam_hdr_destroy(header);
    if (fai) fai_destroy(fai);
    if (fp) sam_close(fp);
    if (fpout) sam_close(fpout);
    return 1;
}
Esempio n. 15
0
GCBias::GCBias(const char* ref_filename, PosTable& foreground_position_table,
	           pos_t median_frag_len,
               sequencing_bias* seqbias[2],
               const char* task_name)
{
	faidx_t* ref_file = fai_load(ref_filename);
	if (!ref_file) {
        Logger::abort("Can't open fasta file '%s'.", ref_filename);
	}

	std::vector<ReadPos> foreground_positions;
    const size_t max_dump = 10000000;
	foreground_position_table.dump(foreground_positions, max_dump);
	std::sort(foreground_positions.begin(), foreground_positions.end(), ReadPosSeqnameCmp());

	Logger::push_task(task_name, foreground_positions.size());
	LoggerTask& task = Logger::get_task(task_name);

    typedef std::pair<float, float> WeightedGC;

	std::vector<WeightedGC> foreground_gc, background_gc;

    int  seqlen = 0;
    SeqName curr_seqname;
    char* seq = NULL;
    twobitseq tbseq;
    twobitseq tbseqrc;
    rng_t rng;

    pos_t L = seqbias[0] ? seqbias[0]->getL() : 0;

    std::vector<ReadPos>::iterator i;
    for (i = foreground_positions.begin(); i != foreground_positions.end(); ++i) {
        if (i->seqname != curr_seqname) {
            free(seq);
            seq = faidx_fetch_seq(ref_file, i->seqname.get().c_str(), 0, INT_MAX, &seqlen);
            Logger::debug("read sequence %s.", i->seqname.get().c_str());

            if (seq == NULL) {
                Logger::warn("warning: reference sequence not found, skipping.");
            }
            else {
                for (char* c = seq; *c; c++) *c = tolower(*c);
                tbseq = seq;
                tbseqrc = tbseq;
                tbseqrc.revcomp();

            }

            curr_seqname = i->seqname;
        }

        if (seq == NULL || (pos_t) tbseq.size() < median_frag_len) continue;

        // fragments with many copies tend to have too much weight when training
        // leading to somewhat less than stable results.
        if (i->count > 4) continue;

        // sample background position
        boost::random::uniform_int_distribution<pos_t> random_uniform(
                i->start + L, i->end - median_frag_len);
        pos_t pos = random_uniform(rng);
        float gc = (float) gc_count(seq + pos, median_frag_len) / median_frag_len;
        float sb = seqbias[0] ?
                   seqbias[0]->get_bias(tbseq, pos - L) *
                   seqbias[1]->get_bias(tbseqrc, seqlen - pos - 1 - L) : 1.0;
        background_gc.push_back(WeightedGC(gc, 1.0 / sb));

        // sample foreground position
        if (i->strand == 0) {
            if (i->pos >= i->start && i->pos + median_frag_len - 1 <= i->end) {
                float sb = seqbias[0] ?
                           seqbias[0]->get_bias(tbseq, i->pos - L) *
                           seqbias[1]->get_bias(tbseqrc, seqlen - i->pos - 1 - L) : 1.0;

                foreground_gc.push_back(
                    WeightedGC((float) gc_count(seq + i->pos, median_frag_len) / median_frag_len,
                               1.0 / sb));
            }
        } else {
            if (i->pos - median_frag_len >= i->start && i->pos <= i->end) {
                float sb = seqbias[0] ?
                           seqbias[0]->get_bias(tbseq, i->pos - median_frag_len - L) *
                           seqbias[1]->get_bias(tbseqrc, seqlen - i->pos - median_frag_len - 1 - L) : 1.0;
                foreground_gc.push_back(
                    WeightedGC((float) gc_count(seq + i->pos - median_frag_len, median_frag_len) / median_frag_len,
                               1.0 /sb));
            }
        }
        task.inc();
    }

    free(seq);
    fai_destroy(ref_file);

#if 0
    FILE* out = fopen("gcbias.tsv", "w");
    fprintf(out, "group\tgc\tweight\n");
    BOOST_FOREACH (WeightedGC& value, foreground_gc) {
        fprintf(out, "foreground\t%f\t%f\n", (double) value.first, (double) value.second);
    }
int scorereads_main(int argc, char** argv)
{
    parse_scorereads_options(argc, argv);
    omp_set_num_threads(opt::num_threads);

    Fast5Map name_map(opt::reads_file);
    ModelMap models;
    if (!opt::models_fofn.empty())
        models = read_models_fofn(opt::models_fofn);
    
    // Open the BAM and iterate over reads

    // load bam file
    htsFile* bam_fh = sam_open(opt::bam_file.c_str(), "r");
    assert(bam_fh != NULL);

    // load bam index file
    std::string index_filename = opt::bam_file + ".bai";
    hts_idx_t* bam_idx = bam_index_load(index_filename.c_str());
    assert(bam_idx != NULL);

    // read the bam header
    bam_hdr_t* hdr = sam_hdr_read(bam_fh);
    
    // load reference fai file
    faidx_t *fai = fai_load(opt::genome_file.c_str());

    hts_itr_t* itr;

    // If processing a region of the genome, only emit events aligned to this window
    int clip_start = -1;
    int clip_end = -1;

    if(opt::region.empty()) {
        // TODO: is this valid?
        itr = sam_itr_queryi(bam_idx, HTS_IDX_START, 0, 0);
    } else {

        fprintf(stderr, "Region: %s\n", opt::region.c_str());
        itr = sam_itr_querys(bam_idx, hdr, opt::region.c_str());
        hts_parse_reg(opt::region.c_str(), &clip_start, &clip_end);
    }

#ifndef H5_HAVE_THREADSAFE
    if(opt::num_threads > 1) {
        fprintf(stderr, "You enabled multi-threading but you do not have a threadsafe HDF5\n");
        fprintf(stderr, "Please recompile nanopolish's built-in libhdf5 or run with -t 1\n");
        exit(1);
    }
#endif

    // Initialize iteration
    std::vector<bam1_t*> records(opt::batch_size, NULL);
    for(size_t i = 0; i < records.size(); ++i) {
        records[i] = bam_init1();
    }

    int result;
    size_t num_reads_realigned = 0;
    size_t num_records_buffered = 0;

    do {
        assert(num_records_buffered < records.size());
        
        // read a record into the next slot in the buffer
        result = sam_itr_next(bam_fh, itr, records[num_records_buffered]);
        num_records_buffered += result >= 0;

        // realign if we've hit the max buffer size or reached the end of file
        if(num_records_buffered == records.size() || result < 0) {
            #pragma omp parallel for schedule(dynamic)
            for(size_t i = 0; i < num_records_buffered; ++i) {
                bam1_t* record = records[i];
                size_t read_idx = num_reads_realigned + i;
                if( (record->core.flag & BAM_FUNMAP) == 0) {

                    //load read
                    std::string read_name = bam_get_qname(record);
                    std::string fast5_path = name_map.get_path(read_name);
                    SquiggleRead sr(read_name, fast5_path);

                    // TODO: early exit when have processed all of the reads in readnames
                    if (!opt::readnames.empty() && 
                         std::find(opt::readnames.begin(), opt::readnames.end(), read_name) == opt::readnames.end() )
                            continue;

                    for(size_t strand_idx = 0; strand_idx < NUM_STRANDS; ++strand_idx) {
                        std::vector<EventAlignment> ao = alignment_from_read(sr, strand_idx, read_idx,
                                                                             models, fai, hdr,
                                                                             record, clip_start, clip_end);
                        if (ao.size() == 0)
                            continue;

                        // Update pore model based on alignment
                        if ( opt::calibrate ) 
                            recalibrate_model(sr, strand_idx, ao, false);

                        double score = model_score(sr, strand_idx, fai, ao, 500);
                        if (score > 0) 
                            continue;
                        #pragma omp critical(print)
                        std::cout << read_name << " " << ( strand_idx ? "complement" : "template" ) 
                                  << " " << sr.pore_model[strand_idx].name << " " << score << std::endl;
                    } 
                }
            }

            num_reads_realigned += num_records_buffered;
            num_records_buffered = 0;
        }

    } while(result >= 0);
    
    // cleanup records
    for(size_t i = 0; i < records.size(); ++i) {
        bam_destroy1(records[i]);
    }

    // cleanup
    sam_itr_destroy(itr);
    bam_hdr_destroy(hdr);
    fai_destroy(fai);
    sam_close(bam_fh);
    hts_idx_destroy(bam_idx);
    return 0;
}
Esempio n. 17
0
int main(int argc, char *argv[]) {
    int c;
    const char* normal_sample_id = _default_normal_sample_id;
    const char* tumor_sample_id = _default_tumor_sample_id;
    const char *fn_fa = 0;
    pu_data2_t *d = (pu_data2_t*)calloc(1, sizeof(pu_data2_t));
    d->min_somatic_qual=15;
    d->tid = -1; d->mask = BAM_DEF_MASK; d->mapQ = 0;
    d->c = sniper_maqcns_init();
    int use_priors = 1;
    d->include_loh = 1;
    d->include_gor = 1;
    d->use_joint_priors = 0;
    d->somatic_mutation_rate = 0.01;
    const char *output_format = "classic";

    while ((c = getopt(argc, argv, "n:t:vf:T:N:r:I:q:Q:pLGJs:F:")) >= 0) {
        switch (c) {
            case 'f': fn_fa = optarg; break;
            case 'T': d->c->theta = atof(optarg); break;
            case 'N': d->c->n_hap = atoi(optarg); break;
            case 'r': d->c->het_rate = atof(optarg); break;
            case 'q': d->mapQ = atoi(optarg); break;
            case 'Q': d->min_somatic_qual = atoi(optarg); break;
            case 'F': output_format = optarg; break;
            case 'p': use_priors = 0; break;
            case 'J': d->use_joint_priors = 1; break;
            case 's': d->somatic_mutation_rate = atof(optarg); d->use_joint_priors = 1; break;
            case 'v': version_info(); exit(0); break;
            case 't': tumor_sample_id = optarg; break;
            case 'n': normal_sample_id = optarg; break;
            case 'L': d->include_loh = 0; break;
            case 'G': d->include_gor = 0; break;
            default: fprintf(stderr, "Unrecognizd option '-%c'.\n", c); return 1;
        }
    }

    if (optind == argc) {
        usage(argv[0], d);
        sniper_maqcns_destroy(d->c); free(d);
        return 1;
    }
    if (fn_fa) {
        d->fai = fai_load(fn_fa);
    }
    else {
        fprintf(stderr, "You MUST specify a reference sequence. It isn't optional.\n");
        sniper_maqcns_destroy(d->c);
        free(d);
        exit(1);
    }
    if(d->use_joint_priors) {
        fprintf(stderr,"Using priors accounting for somatic mutation rate. Prior probability of a somatic mutation is %f\n",d->somatic_mutation_rate);
        make_joint_prior(d->somatic_mutation_rate);
    }

    sniper_maqcns_prepare(d->c);
    fprintf(stderr,"Preparing to snipe some somatics\n");
    if(use_priors) {
        fprintf(stderr,"Using prior probabilities\n");
        makeSoloPrior();
    }
    bamFile fp1, fp2;
    qAddTableInit();
    fp1 = (strcmp(argv[optind], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[optind], "r");
    fprintf(stderr, "Normal bam is %s\n", argv[optind+1]);
    fprintf(stderr, "Tumor bam is %s\n", argv[optind]);
    d->h1 = bam_header_read(fp1);
    sam_header_parse_rg(d->h1);
    fp2 = bam_open(argv[optind+1], "r");
    d->h2 = bam_header_read(fp2);
    sam_header_parse_rg(d->h2);
    FILE* snp_fh = fopen(argv[optind+2], "w");
    /* this will exit if the format name is invalid */
    output_formatter_t fmt = output_formatter_create(output_format, snp_fh);
    d->output_formatter = &fmt;
    if(snp_fh) {
        header_data_t hdr;
        hdr.refseq = fn_fa;
        hdr.normal_sample_id = normal_sample_id;
        hdr.tumor_sample_id = tumor_sample_id;
        d->output_formatter->header_fn(snp_fh, &hdr);
        bam_sspileup_file(fp1, fp2, d->mask, d->mapQ, glf_somatic, d, snp_fh);
    }
    else {
        fprintf(stderr, "Unable to open snp file!!!!!!!!!\n");
        exit(1);
    }

    bam_close(fp1);
    bam_close(fp2);
    bam_header_destroy(d->h1);
    bam_header_destroy(d->h2);
    if (d->fai) fai_destroy(d->fai);
    sniper_maqcns_destroy(d->c);
    free(d->ref); free(d);
    fclose(snp_fh);
    return 0;
}
void train_one_round(const Fast5Map& name_map, size_t round)
{
    const PoreModelMap& current_models = PoreModelSet::get_models(opt::trained_model_type);

    // Initialize the training summary stats for each kmer for each model
    ModelTrainingMap model_training_data;
    for(auto current_model_iter = current_models.begin(); current_model_iter != current_models.end(); current_model_iter++) {
        // one summary entry per kmer in the model
        std::vector<StateSummary> summaries(current_model_iter->second.get_num_states());
        model_training_data[current_model_iter->first] = summaries;
    }

    // Open the BAM and iterate over reads

    // load bam file
    htsFile* bam_fh = sam_open(opt::bam_file.c_str(), "r");
    assert(bam_fh != NULL);

    // load bam index file
    std::string index_filename = opt::bam_file + ".bai";
    hts_idx_t* bam_idx = bam_index_load(index_filename.c_str());
    assert(bam_idx != NULL);

    // read the bam header
    bam_hdr_t* hdr = sam_hdr_read(bam_fh);

    // load reference fai file
    faidx_t *fai = fai_load(opt::genome_file.c_str());

    hts_itr_t* itr;

    // If processing a region of the genome, only emit events aligned to this window
    int clip_start = -1;
    int clip_end = -1;

    if(opt::region.empty()) {
        // TODO: is this valid?
        itr = sam_itr_queryi(bam_idx, HTS_IDX_START, 0, 0);
    } else {
        fprintf(stderr, "Region: %s\n", opt::region.c_str());
        itr = sam_itr_querys(bam_idx, hdr, opt::region.c_str());
        hts_parse_reg(opt::region.c_str(), &clip_start, &clip_end);
    }

#ifndef H5_HAVE_THREADSAFE
    if(opt::num_threads > 1) {
        fprintf(stderr, "You enabled multi-threading but you do not have a threadsafe HDF5\n");
        fprintf(stderr, "Please recompile nanopolish's built-in libhdf5 or run with -t 1\n");
        exit(1);
    }
#endif

    // Initialize iteration
    std::vector<bam1_t*> records(opt::batch_size, NULL);
    for(size_t i = 0; i < records.size(); ++i) {
        records[i] = bam_init1();
    }

    int result;
    size_t num_reads_realigned = 0;
    size_t num_records_buffered = 0;
    Progress progress("[methyltrain]");

    do {
        assert(num_records_buffered < records.size());
        
        // read a record into the next slot in the buffer
        result = sam_itr_next(bam_fh, itr, records[num_records_buffered]);
        num_records_buffered += result >= 0;

        // realign if we've hit the max buffer size or reached the end of file
        if(num_records_buffered == records.size() || result < 0) {
            #pragma omp parallel for            
            for(size_t i = 0; i < num_records_buffered; ++i) {
                bam1_t* record = records[i];
                size_t read_idx = num_reads_realigned + i;
                if( (record->core.flag & BAM_FUNMAP) == 0) {
                    add_aligned_events(name_map, fai, hdr, record, read_idx, clip_start, clip_end, round, model_training_data);
                }
            }

            num_reads_realigned += num_records_buffered;
            num_records_buffered = 0;
        }

        if(opt::progress) {
            fprintf(stderr, "Realigned %zu reads in %.1lfs\r", num_reads_realigned, progress.get_elapsed_seconds());
        }
    } while(result >= 0);
    
    assert(num_records_buffered == 0);
    progress.end();
    
    // open the summary file
    std::stringstream summary_fn;
    summary_fn << "methyltrain" << opt::out_suffix << ".summary";
    FILE* summary_fp = fopen(summary_fn.str().c_str(), "w");
    fprintf(summary_fp, "model_short_name\tkmer\tnum_matches\tnum_skips\t"
                         "num_stays\tnum_events_for_training\twas_trained\t"
                         "trained_level_mean\ttrained_level_stdv\n");

    // open the tsv file with the raw training data
    std::stringstream training_fn;
    training_fn << "methyltrain" << opt::out_suffix << ".round" << round << ".events.tsv";
    std::ofstream training_ofs(training_fn.str());

    // write out a header for the training data
    StateTrainingData::write_header(training_ofs);

    // iterate over models: template, complement_pop1, complement_pop2
    for(auto model_training_iter = model_training_data.begin(); 
             model_training_iter != model_training_data.end(); model_training_iter++) {
        
        // Initialize the trained model from the input model
        auto current_model_iter = current_models.find(model_training_iter->first);
        assert(current_model_iter != current_models.end());

        std::string model_name = model_training_iter->first;
        std::string model_short_name = current_model_iter->second.metadata.get_short_name();
        
        // Initialize the new model from the current model
        PoreModel updated_model = current_model_iter->second;
        uint32_t k = updated_model.k;
        const std::vector<StateSummary>& summaries = model_training_iter->second;

        // Generate the complete set of kmers
        std::string gen_kmer(k, 'A');
        std::vector<std::string> all_kmers;
        for(size_t ki = 0; ki < summaries.size(); ++ki) {
            all_kmers.push_back(gen_kmer);
            mtrain_alphabet->lexicographic_next(gen_kmer);
        }
        assert(gen_kmer == std::string(k, 'A'));
        assert(all_kmers.front() == std::string(k, 'A'));
        assert(all_kmers.back() == std::string(k, 'T'));

        // Update means for each kmer
        #pragma omp parallel for
        for(size_t ki = 0; ki < summaries.size(); ++ki) {
            assert(ki < all_kmers.size());
            std::string kmer = all_kmers[ki];

            // write the observed values to a tsv file
            #pragma omp critical
            {
                for(size_t ei = 0; ei < summaries[ki].events.size(); ++ei) {
                    summaries[ki].events[ei].write_tsv(training_ofs, model_short_name, kmer);
                }

            }

            bool is_m_kmer = kmer.find('M') != std::string::npos;
            bool update_kmer = opt::training_target == TT_ALL_KMERS ||
                               (is_m_kmer && opt::training_target == TT_METHYLATED_KMERS) ||
                               (!is_m_kmer && opt::training_target == TT_UNMETHYLATED_KMERS);
            bool trained = false;
            // only train if there are a sufficient number of events for this kmer
            if(update_kmer && summaries[ki].events.size() >= opt::min_number_of_events_to_train) {
                
                // train a mixture model where a minority of k-mers aren't methylated
                ParamMixture mixture;
                
                float incomplete_methylation_rate = 0.05f;
                std::string um_kmer = mtrain_alphabet->unmethylate(kmer);
                size_t um_ki = mtrain_alphabet->kmer_rank(um_kmer.c_str(), k);
                
                // Initialize the training parameters. If this is a kmer containing
                // a methylation site we train a two component mixture, otherwise
                // just fit a gaussian
                float major_weight = is_m_kmer ? 1 - incomplete_methylation_rate : 1.0f;
                mixture.log_weights.push_back(log(major_weight));
                mixture.params.push_back(current_model_iter->second.get_parameters(ki));
                
                if(is_m_kmer) {
                    // add second unmethylated component
                    mixture.log_weights.push_back(std::log(incomplete_methylation_rate));
                    mixture.params.push_back(current_model_iter->second.get_parameters(um_ki));
                }

                if(opt::verbose > 1) {
                    fprintf(stderr, "INIT__MIX %s\t%s\t[%.2lf %.2lf %.2lf]\t[%.2lf %.2lf %.2lf]\n", model_training_iter->first.c_str(), kmer.c_str(), 
                        std::exp(mixture.log_weights[0]), mixture.params[0].level_mean, mixture.params[0].level_stdv,
                        std::exp(mixture.log_weights[1]), mixture.params[1].level_mean, mixture.params[1].level_stdv);
                }

                ParamMixture trained_mixture = train_gaussian_mixture(summaries[ki].events, mixture);

                if(opt::verbose > 1) {
                    fprintf(stderr, "TRAIN_MIX %s\t%s\t[%.2lf %.2lf %.2lf]\t[%.2lf %.2lf %.2lf]\n", model_training_iter->first.c_str(), kmer.c_str(), 
                        std::exp(trained_mixture.log_weights[0]), trained_mixture.params[0].level_mean, trained_mixture.params[0].level_stdv,
                        std::exp(trained_mixture.log_weights[1]), trained_mixture.params[1].level_mean, trained_mixture.params[1].level_stdv);
                }

                #pragma omp critical
                updated_model.states[ki] = trained_mixture.params[0];

                if (model_stdv()) {
                    ParamMixture ig_mixture;
                    // weights
                    ig_mixture.log_weights = trained_mixture.log_weights;
                    // states
                    ig_mixture.params.emplace_back(trained_mixture.params[0]);

                    if(is_m_kmer) {
                        ig_mixture.params.emplace_back(current_model_iter->second.get_parameters(um_ki));
                    }
                    // run training
                    auto trained_ig_mixture = train_invgaussian_mixture(summaries[ki].events, ig_mixture);

                    LOG("methyltrain", debug)
                        << "IG_INIT__MIX " << model_training_iter->first.c_str() << " " << kmer.c_str() << " ["
                        << std::fixed << std::setprecision(5) << ig_mixture.params[0].sd_mean << " "
                        << ig_mixture.params[1].sd_mean << "]" << std::endl
                        << "IG_TRAIN_MIX " << model_training_iter->first.c_str() << " " << kmer.c_str() << " ["
                        << trained_ig_mixture.params[0].sd_mean << " "
                        << trained_ig_mixture.params[1].sd_mean << "]" << std::endl;

                    // update state
                    #pragma omp critical
                    {
                        updated_model.states[ki] = trained_ig_mixture.params[0];
                    }
                }

                trained = true;
            }

            #pragma omp critical
            {
                fprintf(summary_fp, "%s\t%s\t%d\t%d\t%d\t%zu\t%d\t%.2lf\t%.2lf\n",
                                        model_short_name.c_str(), kmer.c_str(), 
                                        summaries[ki].num_matches, summaries[ki].num_skips, summaries[ki].num_stays, 
                                        summaries[ki].events.size(), trained, updated_model.states[ki].level_mean, updated_model.states[ki].level_stdv);
            }

            // add the updated model into the collection (or replace what is already there)
            PoreModelSet::insert_model(opt::trained_model_type, updated_model);
        }
    }

    // cleanup records
    for(size_t i = 0; i < records.size(); ++i) {
        bam_destroy1(records[i]);
    }

    // cleanup
    sam_itr_destroy(itr);
    bam_hdr_destroy(hdr);
    fai_destroy(fai);
    sam_close(bam_fh);
    hts_idx_destroy(bam_idx);
    fclose(summary_fp);
}
Esempio n. 19
0
int extract_main(int argc, char *argv[]) {
    char *opref = NULL, *oname, *p;
    int c, i;
    Config config;

    //Defaults
    config.keepCpG = 1;
    config.keepCHG = 0;
    config.keepCHH = 0;
    config.minMapq = 10;
    config.minPhred = 5;
    config.keepDupes = 0;
    config.keepSingleton = 0, config.keepDiscordant = 0;
    config.merge = 0;
    config.maxDepth = 2000;
    config.fai = NULL;
    config.fp = NULL;
    config.bai = NULL;
    config.reg = NULL;
    config.bedName = NULL;
    config.bed = NULL;
    config.fraction = 0;
    config.counts = 0;
    config.logit = 0;
    for(i=0; i<16; i++) config.bounds[i] = 0;

    static struct option lopts[] = {
        {"opref",        1, NULL, 'o'},
        {"fraction",     0, NULL, 'f'},
        {"counts",       0, NULL, 'c'},
        {"logit",        0, NULL, 'm'},
        {"noCpG",        0, NULL,   1},
        {"CHG",          0, NULL,   2},
        {"CHH",          0, NULL,   3},
        {"keepDupes",    0, NULL,   4},
        {"keepSingleton",0, NULL,   5},
        {"keepDiscordant",0,NULL,   6},
        {"OT",           1, NULL,   7},
        {"OB",           1, NULL,   8},
        {"CTOT",         1, NULL,   9},
        {"CTOB",         1, NULL,  10},
        {"mergeContext", 0, NULL,  11},
        {"help",         0, NULL, 'h'},
        {0,              0, NULL,   0}
    };
    while((c = getopt_long(argc, argv, "q:p:r:l:o:D:f:c:m:", lopts,NULL)) >=0) {
        switch(c) {
        case 'h' :
            extract_usage();
            return 0;
        case 'o' :
            opref = strdup(optarg);
            break;
        case 'D' :
            config.maxDepth = atoi(optarg);
            break;
        case 'r':
            config.reg = strdup(optarg);
            break;
        case 'l' :
            config.bedName = optarg;
            break;
        case 1 :
            config.keepCpG = 0;
            break;
        case 2 :
            config.keepCHG = 1;
            break;
        case 3 :
            config.keepCHH = 1;
            break;
        case 4 :
            config.keepDupes = 1;
            break;
        case 5 :
            config.keepSingleton = 1;
            break;
        case 6 :
            config.keepDiscordant = 1;
            break;
        case 7 :
            parseBounds(optarg, config.bounds, 0);
            break;
        case 8 :
            parseBounds(optarg, config.bounds, 1);
            break;
        case 9 :
            parseBounds(optarg, config.bounds, 2);
            break;
        case 10 :
            parseBounds(optarg, config.bounds, 3);
            break;
        case 11 :
            config.merge = 1;
            break;
        case 'q' :
            config.minMapq = atoi(optarg);
            break;
        case 'p' :
            config.minPhred = atoi(optarg);
            break;
        case 'm' :
            config.logit = 1;
            break;
        case 'f' :
            config.fraction = 1;
            break;
        case 'c' :
            config.counts = 1;
            break;
        case '?' :
        default :
            fprintf(stderr, "Invalid option '%c'\n", c);
            extract_usage();
            return 1;
        }
    }

    if(argc == 1) {
        extract_usage();
        return 0;
    }
    if(argc-optind != 2) {
        fprintf(stderr, "You must supply a reference genome in fasta format and an input BAM file!!!\n");
        extract_usage();
        return -1;
    }

    //Are the options reasonable?
    if(config.minPhred < 1) {
        fprintf(stderr, "-p %i is invalid. resetting to 1, which is the lowest possible value.\n", config.minPhred);
        config.minPhred = 1;
    }
    if(config.minMapq < 0) {
        fprintf(stderr, "-q %i is invalid. Resetting to 0, which is the lowest possible value.\n", config.minMapq);
        config.minMapq = 0;
    }
    if(config.fraction+config.counts+config.logit > 1) {
        fprintf(stderr, "More than one of --fraction, --counts, and --logit were specified. These are mutually exclusive.\n");
        extract_usage();
        return 1;
    }

    //Has more than one output format been requested?
    if(config.fraction + config.counts + config.logit > 1) {
        fprintf(stderr, "You may specify AT MOST one of -c/--counts, -f/--fraction, or -m/--logit.\n");
        return -6;
    }

    //Is there still a metric to output?
    if(!(config.keepCpG + config.keepCHG + config.keepCHH)) {
        fprintf(stderr, "You haven't specified any metrics to output!\nEither don't use the --noCpG option or specify --CHG and/or --CHH.\n");
        return -1;
    }

    //Open the files
    if((config.fai = fai_load(argv[optind])) == NULL) {
        fprintf(stderr, "Couldn't open the index for %s!\n", argv[optind]);
        extract_usage();
        return -2;
    }
    if((config.fp = hts_open(argv[optind+1], "rb")) == NULL) {
        fprintf(stderr, "Couldn't open %s for reading!\n", argv[optind+1]);
        return -4;
    }
    if((config.bai = sam_index_load(config.fp, argv[optind+1])) == NULL) {
        fprintf(stderr, "Couldn't load the index for %s, will attempt to build it.\n", argv[optind+1]);
        if(bam_index_build(argv[optind+1], 0) < 0) {
            fprintf(stderr, "Couldn't build the index for %s! File corrupted?\n", argv[optind+1]);
            return -5;
        }
        if((config.bai = sam_index_load(config.fp, argv[optind+1])) == NULL) {
            fprintf(stderr, "Still couldn't load the index, quiting.\n");
            return -5;
        }
    }

    //Output files
    config.output_fp = malloc(sizeof(FILE *) * 3);
    assert(config.output_fp);
    if(opref == NULL) {
        opref = strdup(argv[optind+1]);
        assert(opref);
        p = strrchr(opref, '.');
        if(p != NULL) *p = '\0';
        fprintf(stderr, "writing to prefix:'%s'\n", opref);
    }
    if(config.fraction) {
        oname = malloc(sizeof(char) * (strlen(opref)+19));
    } else if(config.counts) {
        oname = malloc(sizeof(char) * (strlen(opref)+21));
    } else if(config.logit) {
        oname = malloc(sizeof(char) * (strlen(opref)+20));
    } else {
        oname = malloc(sizeof(char) * (strlen(opref)+14));
    }
    assert(oname);
    if(config.keepCpG) {
        if(config.fraction) {
            sprintf(oname, "%s_CpG.meth.bedGraph", opref);
        } else if(config.counts) {
            sprintf(oname, "%s_CpG.counts.bedGraph", opref);
        } else if(config.logit) {
            sprintf(oname, "%s_CpG.logit.bedGraph", opref);
        } else {
            sprintf(oname, "%s_CpG.bedGraph", opref);
        }
        config.output_fp[0] = fopen(oname, "w");
        if(config.output_fp[0] == NULL) {
            fprintf(stderr, "Couldn't open the output CpG metrics file for writing! Insufficient permissions?\n");
            return -3;
        }
        printHeader(config.output_fp[0], "CpG", opref, config);
    }
    if(config.keepCHG) {
        if(config.fraction) {
            sprintf(oname, "%s_CHG.meth.bedGraph", opref);
        } else if(config.counts) {
            sprintf(oname, "%s_CHG.counts.bedGraph", opref);
        } else if(config.logit) {
            sprintf(oname, "%s_CHG.logit.bedGraph", opref);
        } else {
            sprintf(oname, "%s_CHG.bedGraph", opref);
        }
        config.output_fp[1] = fopen(oname, "w");
        if(config.output_fp[1] == NULL) {
            fprintf(stderr, "Couldn't open the output CHG metrics file for writing! Insufficient permissions?\n");
            return -3;
        }
        printHeader(config.output_fp[1], "CHG", opref, config);
    }
    if(config.keepCHH) {
        if(config.fraction) {
            sprintf(oname, "%s_CHH.meth.bedGraph", opref);
        } else if(config.counts) {
            sprintf(oname, "%s_CHH.counts.bedGraph", opref);
        } else if(config.logit) {
            sprintf(oname, "%s_CHH.logit.bedGraph", opref);
        } else {
            sprintf(oname, "%s_CHH.bedGraph", opref);
        }
        config.output_fp[2] = fopen(oname, "w");
        if(config.output_fp[2] == NULL) {
            fprintf(stderr, "Couldn't open the output CHH metrics file for writing! Insufficient permissions?\n");
            return -3;
        }
        printHeader(config.output_fp[2], "CHH", opref, config);
    }

    //Run the pileup
    extractCalls(&config);

    //Close things up
    hts_close(config.fp);
    fai_destroy(config.fai);
    if(config.keepCpG) fclose(config.output_fp[0]);
    if(config.keepCHG) fclose(config.output_fp[1]);
    if(config.keepCHH) fclose(config.output_fp[2]);
    hts_idx_destroy(config.bai);
    free(opref);
    if(config.reg) free(config.reg);
    if(config.bed) destroyBED(config.bed);
    free(oname);
    free(config.output_fp);

    return 0;
}
Esempio n. 20
0
int main_pad2unpad(int argc, char *argv[])
{
    samFile *in = 0, *out = 0;
    bam_hdr_t *h = 0, *h_fix = 0;
    faidx_t *fai = 0;
    int c, compress_level = -1, is_long_help = 0;
    char in_mode[5], out_mode[6], *fn_out = 0, *fn_list = 0;
    int ret=0;
    sam_global_args ga = SAM_GLOBAL_ARGS_INIT;

    static const struct option lopts[] = {
        SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 'T'),
        { NULL, 0, NULL, 0 }
    };

    /* parse command-line options */
    strcpy(in_mode, "r"); strcpy(out_mode, "w");
    while ((c = getopt_long(argc, argv, "SCso:u1T:?", lopts, NULL)) >= 0) {
        switch (c) {
        case 'S': break;
        case 'C': hts_parse_format(&ga.out, "cram"); break;
        case 's': assert(compress_level == -1); hts_parse_format(&ga.out, "sam"); break;
        case 'o': fn_out = strdup(optarg); break;
        case 'u':
            compress_level = 0;
            if (ga.out.format == unknown_format)
                hts_parse_format(&ga.out, "bam");
            break;
        case '1':
            compress_level = 1;
            if (ga.out.format == unknown_format)
                hts_parse_format(&ga.out, "bam");
            break;
        case '?': is_long_help = 1; break;
        default:  if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
            fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
            return usage(is_long_help);
        }
    }
    if (argc == optind) return usage(is_long_help);

    strcat(out_mode, "h");
    if (compress_level >= 0) {
        char tmp[2];
        tmp[0] = compress_level + '0'; tmp[1] = '\0';
        strcat(out_mode, tmp);
    }

    // Load FASTA reference (also needed for SAM -> BAM if missing header)
    if (ga.reference) {
        fn_list = samfaipath(ga.reference);
        fai = fai_load(ga.reference);
    }
    // open file handlers
    if ((in = sam_open_format(argv[optind], in_mode, &ga.in)) == 0) {
        fprintf(stderr, "[depad] failed to open \"%s\" for reading.\n", argv[optind]);
        ret = 1;
        goto depad_end;
    }
    if (fn_list && hts_set_fai_filename(in, fn_list) != 0) {
        fprintf(stderr, "[depad] failed to load reference file \"%s\".\n", fn_list);
        ret = 1;
        goto depad_end;
    }
    if ((h = sam_hdr_read(in)) == 0) {
        fprintf(stderr, "[depad] failed to read the header from \"%s\".\n", argv[optind]);
        ret = 1;
        goto depad_end;
    }
    if (fai) {
        h_fix = fix_header(h, fai);
    } else {
        fprintf(stderr, "[depad] Warning - reference lengths will not be corrected without FASTA reference\n");
        h_fix = h;
    }
    char wmode[2];
    strcat(out_mode, sam_open_mode(wmode, fn_out, NULL)==0 ? wmode : "b");
    if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
        fprintf(stderr, "[depad] failed to open \"%s\" for writing.\n", fn_out? fn_out : "standard output");
        ret = 1;
        goto depad_end;
    }

    // Reference-based CRAM won't work unless we also create a new reference.
    // We could embed this, but for now we take the easy option.
    if (ga.out.format == cram)
        hts_set_opt(out, CRAM_OPT_NO_REF, 1);

    if (sam_hdr_write(out, h_fix) != 0) {
        fprintf(stderr, "[depad] failed to write header.\n");
        ret = 1;
        goto depad_end;
    }

    // Do the depad
    ret = bam_pad2unpad(in, out, h, fai);

depad_end:
    // close files, free and return
    if (fai) fai_destroy(fai);
    if (h) bam_hdr_destroy(h);
    sam_close(in);
    sam_close(out);
    free(fn_list); free(fn_out);
    return ret;
}
Esempio n. 21
0
File: test_realn.c Progetto: atks/vt
int main(int argc, char **argv) {
    htsFile *in = NULL;
    htsFile *out = NULL;
    char *in_name = "-";
    char *out_name = "-";
    char *ref_name = NULL;
    char *ref_seq = NULL;
    char modew[8] = "w";
    faidx_t *fai = NULL;
    bam_hdr_t *hdr = NULL;
    bam1_t *rec = NULL;
    int c, res, last_ref = -1, ref_len = 0;
    int adjust = 0, extended = 0, recalc = 0, flags = 0;

    while ((c = getopt(argc, argv, "aef:hi:o:r")) >= 0) {
        switch (c) {
        case 'a': adjust = 1; break;
        case 'e': extended = 1; break;
        case 'f': ref_name = optarg; break;
        case 'h': usage(argv[0]); return EXIT_SUCCESS;
        case 'i': in_name = optarg; break;
        case 'o': out_name = optarg; break;
        case 'r': recalc = 1; break;
        default: usage(argv[0]); return EXIT_FAILURE;
        }
    }

    if (!ref_name) {
        usage(argv[0]);
        return EXIT_FAILURE;
    }

    flags = (adjust ? 1 : 0) | (extended ? 2 : 0) | (recalc ? 4 : 0);

    fai = fai_load(ref_name);
    if (!fai) {
        fprintf(stderr, "Couldn't load reference %s\n", ref_name);
        goto fail;
    }

    rec = bam_init1();
    if (!rec) {
        perror(NULL);
        goto fail;
    }

    in = hts_open(in_name, "r");
    if (!in) {
        fprintf(stderr, "Couldn't open %s : %s\n", in_name, strerror(errno));
        goto fail;
    }

    hdr = sam_hdr_read(in);
    if (!hdr) {
        fprintf(stderr, "Couldn't read header for %s\n", in_name);
        goto fail;
    }

    out = hts_open(out_name, modew);
    if (!out) {
        fprintf(stderr, "Couldn't open %s : %s\n", out_name, strerror(errno));
        goto fail;
    }

    if (sam_hdr_write(out, hdr) < 0) {
        fprintf(stderr, "Couldn't write header to %s : %s\n",
                out_name, strerror(errno));
        goto fail;
    }

    while ((res = sam_read1(in, hdr, rec)) >= 0) {
        if (rec->core.tid >= hdr->n_targets) {
            fprintf(stderr, "Invalid BAM reference id %d\n", rec->core.tid);
            goto fail;
        }
        if (last_ref != rec->core.tid && rec->core.tid >= 0) {
            free(ref_seq);
            ref_seq = faidx_fetch_seq(fai, hdr->target_name[rec->core.tid],
                                      0, INT_MAX, &ref_len);
            if (!ref_seq) {
                fprintf(stderr, "Couldn't get reference %s\n",
                        hdr->target_name[rec->core.tid]);
                goto fail;
            }
            last_ref = rec->core.tid;
        }
        if (rec->core.tid >= 0) {
            res = sam_prob_realn(rec, ref_seq, ref_len, flags);
            if (res <= -4) {
                fprintf(stderr, "Error running sam_prob_realn : %s\n",
                        strerror(errno));
                goto fail;
            }
        }
        if (sam_write1(out, hdr, rec) < 0) {
            fprintf(stderr, "Error writing to %s\n", out_name);
            goto fail;
        }
    }
    res = hts_close(in);
    in = NULL;
    if (res < 0) {
        fprintf(stderr, "Error closing %s\n", in_name);
        goto fail;
    }

    res = hts_close(out);
    out = NULL;
    if (res < 0) {
        fprintf(stderr, "Error closing %s\n", out_name);
        goto fail;
    }

    bam_hdr_destroy(hdr);
    bam_destroy1(rec);
    free(ref_seq);
    fai_destroy(fai);

    return EXIT_SUCCESS;

 fail:
    if (hdr) bam_hdr_destroy(hdr);
    if (rec) bam_destroy1(rec);
    if (in) hts_close(in);
    if (out) hts_close(out);
    free(ref_seq);
    fai_destroy(fai);
    return EXIT_FAILURE;
}
Esempio n. 22
0
int main(int argc, const char *argv[])
{
    //
    // Parse parameters
    //
    Parameters parameters = Parameters(argc, argv);

    //
    // Open log file
    //
    FILE* logFile = fopen((parameters.outFilePrefix + ".log").c_str(), "w");
    Output2FILE::Stream() = logFile;
    LOG(logINFO) << "HapMuC ver1.1" << std::endl;

    //
    // Open reference genome
    //
    faidx_t *fai = NULL;
    fai = fai_load(parameters.refFileName.c_str());
    if (!fai) {
        LOG(logERROR) << "Cannot open reference sequence file." << std::endl;
        exit(1);
    }

    //
    // Prepare bam files
    //
    MyBam tumorBam = MyBam(parameters.tumorBam);
    MyBam normalBam = MyBam(parameters.normalBam);
    BamReader tumorBamReader = BamReader(&tumorBam, fai, parameters.maxReads);
    BamReader normalBamReader = BamReader(&normalBam, fai, parameters.maxReads);

    //
    // Prepare main algorithm
    //
    MutationCaller mutationCaller = MutationCaller(tumorBamReader, normalBamReader,
                                                   parameters, fai);

    //
    // Open input file and output file
    //
    std::ifstream inputWindowStream(parameters.windowFile.c_str());
    std::ofstream outStream((parameters.outFilePrefix + ".calls.txt").c_str());

    //
    // Output column names
    //
    outStream << MutationCallResult::getHeader() << std::endl;

    std::string line;
    while (getline(inputWindowStream, line)) {
        //
        // Parse a candidate window
        //
        CandidateWindow window = CandidateWindow(line);

        LOG(logINFO) << "************************* target: " <<
        window.info.chr << " " << window.info.start <<
        " " << window.target << " *************************" << std::endl;

        try {
            //
            // Evaluate whether the candidate somatic mutation exists or not
            // by calculating Bayes factor
            //
            MutationCallResult result = mutationCaller.call(window);
            
            //
            // Output the result
            //
            outStream << result.getOutput() << std::endl;
            LOG(logDEBUG) << result.getOutput() << std::endl;
            
        } catch (std::string &s) {
            LOG(logERROR) << "something unexpected happened. exit." << std::endl;
            LOG(logERROR) << s << std::endl;
            exit(1);
        }
    }

    outStream.close();
    inputWindowStream.close();
    fai_destroy(fai);

    return 0;
}
int methyltest_main(int argc, char** argv)
{
    parse_methyltest_options(argc, argv);
    omp_set_num_threads(opt::num_threads);

    Fast5Map name_map(opt::reads_file);
    ModelMap models = read_models_fofn(opt::models_fofn, mtest_alphabet);
    
    // Open the BAM and iterate over reads

    // load bam file
    htsFile* bam_fh = sam_open(opt::bam_file.c_str(), "r");
    assert(bam_fh != NULL);

    // load bam index file
    std::string index_filename = opt::bam_file + ".bai";
    hts_idx_t* bam_idx = bam_index_load(index_filename.c_str());
    assert(bam_idx != NULL);

    // read the bam header
    bam_hdr_t* hdr = sam_hdr_read(bam_fh);
    
    // load reference fai file
    faidx_t *fai = fai_load(opt::genome_file.c_str());

    hts_itr_t* itr;

    // If processing a region of the genome, only emit events aligned to this window
    int clip_start = -1;
    int clip_end = -1;

    if(opt::region.empty()) {
        // TODO: is this valid?
        itr = sam_itr_queryi(bam_idx, HTS_IDX_START, 0, 0);
    } else {

        fprintf(stderr, "Region: %s\n", opt::region.c_str());
        itr = sam_itr_querys(bam_idx, hdr, opt::region.c_str());
        hts_parse_reg(opt::region.c_str(), &clip_start, &clip_end);
    }

#ifndef H5_HAVE_THREADSAFE
    if(opt::num_threads > 1) {
        fprintf(stderr, "You enabled multi-threading but you do not have a threadsafe HDF5\n");
        fprintf(stderr, "Please recompile nanopolish's built-in libhdf5 or run with -t 1\n");
        exit(1);
    }
#endif

    // Initialize writers
    OutputHandles handles;
    handles.site_writer = fopen(std::string(opt::bam_file + ".methyltest.sites.bed").c_str(), "w");
    handles.read_writer = fopen(std::string(opt::bam_file + ".methyltest.reads.tsv").c_str(), "w");
    handles.strand_writer = fopen(std::string(opt::bam_file + ".methyltest.strand.tsv").c_str(), "w");

    // Write a header to the reads.tsv file
    fprintf(handles.read_writer, "name\tsum_ll_ratio\tn_cpg\tcomplement_model\ttags\n");
    
    // strand header
    fprintf(handles.strand_writer, "name\tsum_ll_ratio\tn_cpg\tmodel\n");


    // Initialize iteration
    std::vector<bam1_t*> records(opt::batch_size, NULL);
    for(size_t i = 0; i < records.size(); ++i) {
        records[i] = bam_init1();
    }

    int result;
    size_t num_reads_processed = 0;
    size_t num_records_buffered = 0;
    Progress progress("[methyltest]");

    do {
        assert(num_records_buffered < records.size());
        
        // read a record into the next slot in the buffer
        result = sam_itr_next(bam_fh, itr, records[num_records_buffered]);
        num_records_buffered += result >= 0;

        // realign if we've hit the max buffer size or reached the end of file
        if(num_records_buffered == records.size() || result < 0) {
            
            #pragma omp parallel for
            for(size_t i = 0; i < num_records_buffered; ++i) {
                bam1_t* record = records[i];
                size_t read_idx = num_reads_processed + i;
                if( (record->core.flag & BAM_FUNMAP) == 0) {
                    calculate_methylation_for_read(models, name_map, fai, hdr, record, read_idx, handles);
                }
            }

            num_reads_processed += num_records_buffered;
            num_records_buffered = 0;

        }
    } while(result >= 0);
    
    assert(num_records_buffered == 0);
    progress.end();

    // cleanup records
    for(size_t i = 0; i < records.size(); ++i) {
        bam_destroy1(records[i]);
    }

    // cleanup
    fclose(handles.site_writer);
    fclose(handles.read_writer);
    fclose(handles.strand_writer);

    sam_itr_destroy(itr);
    bam_hdr_destroy(hdr);
    fai_destroy(fai);
    sam_close(bam_fh);
    hts_idx_destroy(bam_idx);
    
    return EXIT_SUCCESS;
}
Esempio n. 24
0
int bam_mpileup(int argc, char *argv[])
{
	int c;
    const char *file_list = NULL;
    char **fn = NULL;
    int nfiles = 0, use_orphan = 0;
	mplp_conf_t mplp;
	memset(&mplp, 0, sizeof(mplp_conf_t));
	#define MPLP_PRINT_POS 0x4000
	mplp.max_mq = 60;
	mplp.min_baseQ = 13;
	mplp.capQ_thres = 0;
	mplp.max_depth = 250; mplp.max_indel_depth = 250;
	mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
	mplp.min_frac = 0.002; mplp.min_support = 1;
	mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
	while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:6Os")) >= 0) {
		switch (c) {
		case 'f':
			mplp.fai = fai_load(optarg);
			if (mplp.fai == 0) return 1;
			break;
		case 'd': mplp.max_depth = atoi(optarg); break;
		case 'r': mplp.reg = strdup(optarg); break;
		case 'l': mplp.bed = bed_read(optarg); break;
		case 'P': mplp.pl_list = strdup(optarg); break;
		case 'g': mplp.flag |= MPLP_GLF; break;
		case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break;
		case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break;
		case 'B': mplp.flag &= ~MPLP_REALN; break;
		case 'D': mplp.flag |= MPLP_FMT_DP; break;
		case 'S': mplp.flag |= MPLP_FMT_SP; break;
		case 'I': mplp.flag |= MPLP_NO_INDEL; break;
		case 'E': mplp.flag |= MPLP_EXT_BAQ; break;
		case '6': mplp.flag |= MPLP_ILLUMINA13; break;
		case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
		case 's': mplp.flag |= MPLP_PRINT_MAPQ; break;
		case 'O': mplp.flag |= MPLP_PRINT_POS; break;
		case 'C': mplp.capQ_thres = atoi(optarg); break;
		case 'M': mplp.max_mq = atoi(optarg); break;
		case 'q': mplp.min_mq = atoi(optarg); break;
		case 'Q': mplp.min_baseQ = atoi(optarg); break;
        case 'b': file_list = optarg; break;
		case 'o': mplp.openQ = atoi(optarg); break;
		case 'e': mplp.extQ = atoi(optarg); break;
		case 'h': mplp.tandemQ = atoi(optarg); break;
		case 'A': use_orphan = 1; break;
		case 'F': mplp.min_frac = atof(optarg); break;
		case 'm': mplp.min_support = atoi(optarg); break;
		case 'L': mplp.max_indel_depth = atoi(optarg); break;
		case 'G': {
				FILE *fp_rg;
				char buf[1024];
				mplp.rghash = bcf_str2id_init();
				if ((fp_rg = fopen(optarg, "r")) == 0)
					fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
				while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
					bcf_str2id_add(mplp.rghash, strdup(buf));
				fclose(fp_rg);
			}
			break;
		}
	}
	if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
	if (argc == 1) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n\n");
		fprintf(stderr, "Input options:\n\n");
		fprintf(stderr, "       -6           assume the quality is in the Illumina-1.3+ encoding\n");
		fprintf(stderr, "       -A           count anomalous read pairs\n");
		fprintf(stderr, "       -B           disable BAQ computation\n");
		fprintf(stderr, "       -b FILE      list of input BAM files [null]\n");
		fprintf(stderr, "       -C INT       parameter for adjusting mapQ; 0 to disable [0]\n");
		fprintf(stderr, "       -d INT       max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth);
		fprintf(stderr, "       -E           extended BAQ for higher sensitivity but lower specificity\n");
		fprintf(stderr, "       -f FILE      faidx indexed reference sequence file [null]\n");
		fprintf(stderr, "       -G FILE      exclude read groups listed in FILE [null]\n");
		fprintf(stderr, "       -l FILE      list of positions (chr pos) or regions (BED) [null]\n");
		fprintf(stderr, "       -M INT       cap mapping quality at INT [%d]\n", mplp.max_mq);
		fprintf(stderr, "       -r STR       region in which pileup is generated [null]\n");
		fprintf(stderr, "       -R           ignore RG tags\n");
		fprintf(stderr, "       -q INT       skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq);
		fprintf(stderr, "       -Q INT       skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ);
		fprintf(stderr, "\nOutput options:\n\n");
		fprintf(stderr, "       -D           output per-sample DP in BCF (require -g/-u)\n");
		fprintf(stderr, "       -g           generate BCF output (genotype likelihoods)\n");
		fprintf(stderr, "       -O           output base positions on reads (disabled by -g/-u)\n");
		fprintf(stderr, "       -s           output mapping quality (disabled by -g/-u)\n");
		fprintf(stderr, "       -S           output per-sample strand bias P-value in BCF (require -g/-u)\n");
		fprintf(stderr, "       -u           generate uncompress BCF output\n");
		fprintf(stderr, "\nSNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):\n\n");
		fprintf(stderr, "       -e INT       Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ);
		fprintf(stderr, "       -F FLOAT     minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac);
		fprintf(stderr, "       -h INT       coefficient for homopolymer errors [%d]\n", mplp.tandemQ);
		fprintf(stderr, "       -I           do not perform indel calling\n");
		fprintf(stderr, "       -L INT       max per-sample depth for INDEL calling [%d]\n", mplp.max_indel_depth);
		fprintf(stderr, "       -m INT       minimum gapped reads for indel candidates [%d]\n", mplp.min_support);
		fprintf(stderr, "       -o INT       Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ);
		fprintf(stderr, "       -P STR       comma separated list of platforms for indels [all]\n");
		fprintf(stderr, "\n");
		fprintf(stderr, "Notes: Assuming diploid individuals.\n\n");
		return 1;
	}
    if (file_list) {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        mpileup(&mplp,nfiles,fn);
        for (c=0; c<nfiles; c++) free(fn[c]);
        free(fn);
    } else mpileup(&mplp, argc - optind, argv + optind);
	if (mplp.rghash) bcf_str2id_thorough_destroy(mplp.rghash);
	free(mplp.reg); free(mplp.pl_list);
	if (mplp.fai) fai_destroy(mplp.fai);
	if (mplp.bed) bed_destroy(mplp.bed);
	return 0;
}
Esempio n. 25
0
int bam_mpileup(int argc, char *argv[])
{
    int c;
    const char *file_list = NULL;
    char **fn = NULL;
    int nfiles = 0, use_orphan = 0;
    mplp_conf_t mplp;
    memset(&mplp, 0, sizeof(mplp_conf_t));
    mplp.min_baseQ = 13;
    mplp.capQ_thres = 0;
    mplp.max_depth = 250; mplp.max_indel_depth = 250;
    mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
    mplp.min_frac = 0.002; mplp.min_support = 1;
    mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN | MPLP_SMART_OVERLAPS;
    mplp.argc = argc; mplp.argv = argv;
    mplp.rflag_filter = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP;
    mplp.output_fname = NULL;
    static const struct option lopts[] =
    {
        {"rf", required_argument, NULL, 1},   // require flag
        {"ff", required_argument, NULL, 2},   // filter flag
        {"incl-flags", required_argument, NULL, 1},
        {"excl-flags", required_argument, NULL, 2},
        {"output", required_argument, NULL, 3},
        {"open-prob", required_argument, NULL, 4},
        {"illumina1.3+", no_argument, NULL, '6'},
        {"count-orphans", no_argument, NULL, 'A'},
        {"bam-list", required_argument, NULL, 'b'},
        {"no-BAQ", no_argument, NULL, 'B'},
        {"no-baq", no_argument, NULL, 'B'},
        {"adjust-MQ", required_argument, NULL, 'C'},
        {"adjust-mq", required_argument, NULL, 'C'},
        {"max-depth", required_argument, NULL, 'd'},
        {"redo-BAQ", no_argument, NULL, 'E'},
        {"redo-baq", no_argument, NULL, 'E'},
        {"fasta-ref", required_argument, NULL, 'f'},
        {"exclude-RG", required_argument, NULL, 'G'},
        {"exclude-rg", required_argument, NULL, 'G'},
        {"positions", required_argument, NULL, 'l'},
        {"region", required_argument, NULL, 'r'},
        {"ignore-RG", no_argument, NULL, 'R'},
        {"ignore-rg", no_argument, NULL, 'R'},
        {"min-MQ", required_argument, NULL, 'q'},
        {"min-mq", required_argument, NULL, 'q'},
        {"min-BQ", required_argument, NULL, 'Q'},
        {"min-bq", required_argument, NULL, 'Q'},
        {"ignore-overlaps", no_argument, NULL, 'x'},
        {"BCF", no_argument, NULL, 'g'},
        {"bcf", no_argument, NULL, 'g'},
        {"VCF", no_argument, NULL, 'v'},
        {"vcf", no_argument, NULL, 'v'},
        {"output-BP", no_argument, NULL, 'O'},
        {"output-bp", no_argument, NULL, 'O'},
        {"output-MQ", no_argument, NULL, 's'},
        {"output-mq", no_argument, NULL, 's'},
        {"output-tags", required_argument, NULL, 't'},
        {"uncompressed", no_argument, NULL, 'u'},
        {"ext-prob", required_argument, NULL, 'e'},
        {"gap-frac", required_argument, NULL, 'F'},
        {"tandem-qual", required_argument, NULL, 'h'},
        {"skip-indels", no_argument, NULL, 'I'},
        {"max-idepth", required_argument, NULL, 'L'},
        {"min-ireads ", required_argument, NULL, 'm'},
        {"per-sample-mF", no_argument, NULL, 'p'},
        {"per-sample-mf", no_argument, NULL, 'p'},
        {"platforms", required_argument, NULL, 'P'},
        {NULL, 0, NULL, 0}
    };
    while ((c = getopt_long(argc, argv, "Agf:r:l:q:Q:uRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsVvxt:",lopts,NULL)) >= 0) {
        switch (c) {
        case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
        case  1 :
            mplp.rflag_require = bam_str2flag(optarg);
            if ( mplp.rflag_require<0 ) { fprintf(stderr,"Could not parse --rf %s\n", optarg); return 1; }
            break;
        case  2 :
            mplp.rflag_filter = bam_str2flag(optarg);
            if ( mplp.rflag_filter<0 ) { fprintf(stderr,"Could not parse --ff %s\n", optarg); return 1; }
            break;
        case  3 : mplp.output_fname = optarg; break;
        case  4 : mplp.openQ = atoi(optarg); break;
        case 'f':
            mplp.fai = fai_load(optarg);
            if (mplp.fai == 0) return 1;
            mplp.fai_fname = optarg;
            break;
        case 'd': mplp.max_depth = atoi(optarg); break;
        case 'r': mplp.reg = strdup(optarg); break;
        case 'l':
                  // In the original version the whole BAM was streamed which is inefficient
                  //  with few BED intervals and big BAMs. Todo: devise a heuristic to determine
                  //  best strategy, that is streaming or jumping.
                  mplp.bed = bed_read(optarg);
                  if (!mplp.bed) { print_error_errno("Could not read file \"%s\"", optarg); return 1; }
                  break;
        case 'P': mplp.pl_list = strdup(optarg); break;
        case 'p': mplp.flag |= MPLP_PER_SAMPLE; break;
        case 'g': mplp.flag |= MPLP_BCF; break;
        case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; break;
        case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; break;
        case 'B': mplp.flag &= ~MPLP_REALN; break;
        case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(stderr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break;
        case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(stderr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break;
        case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(stderr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break;
        case 'I': mplp.flag |= MPLP_NO_INDEL; break;
        case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
        case '6': mplp.flag |= MPLP_ILLUMINA13; break;
        case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
        case 's': mplp.flag |= MPLP_PRINT_MAPQ; break;
        case 'O': mplp.flag |= MPLP_PRINT_POS; break;
        case 'C': mplp.capQ_thres = atoi(optarg); break;
        case 'q': mplp.min_mq = atoi(optarg); break;
        case 'Q': mplp.min_baseQ = atoi(optarg); break;
        case 'b': file_list = optarg; break;
        case 'o': {
                char *end;
                long value = strtol(optarg, &end, 10);
                // Distinguish between -o INT and -o FILE (a bit of a hack!)
                if (*end == '\0') mplp.openQ = value;
                else mplp.output_fname = optarg;
            }
            break;
        case 'e': mplp.extQ = atoi(optarg); break;
        case 'h': mplp.tandemQ = atoi(optarg); break;
        case 'A': use_orphan = 1; break;
        case 'F': mplp.min_frac = atof(optarg); break;
        case 'm': mplp.min_support = atoi(optarg); break;
        case 'L': mplp.max_indel_depth = atoi(optarg); break;
        case 'G': {
                FILE *fp_rg;
                char buf[1024];
                mplp.rghash = khash_str2int_init();
                if ((fp_rg = fopen(optarg, "r")) == 0)
                    fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
                while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
                    khash_str2int_inc(mplp.rghash, strdup(buf));
                fclose(fp_rg);
            }
            break;
        case 't': mplp.fmt_flag |= parse_format_flag(optarg); break;
        default:
            fprintf(stderr,"Invalid option: '%c'\n", c);
            return 1;
        }
    }
    if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
    {
        fprintf(stderr,"Error: The -B option cannot be combined with -E\n");
        return 1;
    }
    if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
    if (argc == 1)
    {
        print_usage(stderr, &mplp);
        return 1;
    }
    int ret;
    if (file_list) {
        if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
        ret = mpileup(&mplp,nfiles,fn);
        for (c=0; c<nfiles; c++) free(fn[c]);
        free(fn);
    }
    else
        ret = mpileup(&mplp, argc - optind, argv + optind);
    if (mplp.rghash) khash_str2int_destroy_free(mplp.rghash);
    free(mplp.reg); free(mplp.pl_list);
    if (mplp.fai) fai_destroy(mplp.fai);
    if (mplp.bed) bed_destroy(mplp.bed);
    return ret;
}