Ejemplo n.º 1
0
static void destroy_data(args_t *args)
{
    if ( args->prefix )
    {
        fclose(args->fh_log);
        int i, n = args->isec_op==OP_VENN ? 3 : args->files->nreaders;
        for (i=0; i<n; i++)
        {
            if ( !args->fnames[i] ) continue;
            hts_close(args->fh_out[i]);
            if ( args->output_type==FT_VCF_GZ )
            {
                tbx_conf_t conf = tbx_conf_vcf;
                tbx_index_build(args->fnames[i], -1, &conf);
            }
            else if ( args->output_type==FT_BCF_GZ )
            {
                if ( bcf_index_build(args->fnames[i],14) ) error("Could not index %s\n", args->fnames[i]);
            }
            free(args->fnames[i]);
        }
        free(args->fh_out);
        free(args->fnames);
        if ( args->fh_sites ) fclose(args->fh_sites);
        if ( args->write ) free(args->write);
    }
}
Ejemplo n.º 2
0
int main_bcfidx(int argc, char *argv[])
{
    int c, min_shift = 14;
    while ((c = getopt(argc, argv, "s:")) >= 0)
        if (c == 's') min_shift = atoi(optarg);
    if (optind == argc) {
        fprintf(stderr, "Usage: bcfidx [-s minShift] <in.bcf>\n");
        return 1;
    }
    bcf_index_build(argv[optind], min_shift);
    return 0;
}
Ejemplo n.º 3
0
int main_vcfindex(int argc, char *argv[])
{
    int c, force = 0, tbi = 0, stats = 0;
    int min_shift = BCF_LIDX_SHIFT;

    static struct option loptions[] =
    {
        {"csi",no_argument,NULL,'c'},
        {"tbi",no_argument,NULL,'t'},
        {"force",no_argument,NULL,'f'},
        {"min-shift",required_argument,NULL,'m'},
        {"stats",no_argument,NULL,'s'},
        {"nrecords",no_argument,NULL,'n'},
        {NULL, 0, NULL, 0}
    };

    char *tmp;
    while ((c = getopt_long(argc, argv, "ctfm:sn", loptions, NULL)) >= 0)
    {
        switch (c)
        {
            case 'c': tbi = 0; break;
            case 't': tbi = 1; min_shift = 0; break;
            case 'f': force = 1; break;
            case 'm': 
                min_shift = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: --min-shift %s\n", optarg);
                break;
            case 's': stats |= 1; break;
            case 'n': stats |= 2; break;
            default: usage();
        }
    }
    if ( optind==argc ) usage();
    if (stats>2)
    {
        fprintf(stderr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
        return 1;
    }
    if (tbi && min_shift>0)
    {
        fprintf(stderr, "[E::%s] min-shift option only expected for CSI indices \n", __func__);
        return 1;
    }
    if (min_shift < 0 || min_shift > 30)
    {
        fprintf(stderr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift);
        return 1;
    }

    char *fname = argv[optind];
    if (stats) return vcf_index_stats(fname, stats);

    htsFile *fp = hts_open(fname,"r"); 
    if ( !fp ) error("Failed to read %s\n", fname);
    htsFormat type = *hts_get_format(fp);
    hts_close(fp);

    if ( (type.format!=bcf && type.format!=vcf) || type.compression!=bgzf )
    {
        fprintf(stderr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__);
        if ( type.compression!=bgzf )
            fprintf(stderr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__);
        return 1;
    }
    if (tbi && type.format==bcf)
    {
        fprintf(stderr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n");
        tbi = 0; min_shift = BCF_LIDX_SHIFT;
    }
    if (min_shift == 0 && type.format==bcf)
    {
        fprintf(stderr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__);
        return 1;
    }
    if (!tbi && type.format==vcf && min_shift == 0)
    {
        fprintf(stderr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n");
        tbi = 1;
    }

    if (!force)
    {
        // Before complaining about existing index, check if the VCF file isn't newer.
        char *idx_fname = (char*)alloca(strlen(fname) + 5);
        strcat(strcpy(idx_fname, fname), tbi ? ".tbi" : ".csi");
        struct stat stat_tbi, stat_file;
        if ( stat(idx_fname, &stat_tbi)==0 )
        {
            stat(fname, &stat_file);
            if ( stat_file.st_mtime <= stat_tbi.st_mtime )
            {
                fprintf(stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
                return 1;
            }
        }
    }

    if (type.format==bcf)
    {
        if ( bcf_index_build(fname, min_shift) != 0 )
        {
            fprintf(stderr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname);
            return 1;
        }
    }
    else
    {
        if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 )
        {
            fprintf(stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
            return 1;
        }
    }
    return 0;
}
Ejemplo n.º 4
0
int ingest1(const char *input,const char *output,char *ref,bool exit_on_mismatch=true) {
  cerr << "Input: " << input << "\tOutput: "<<output<<endl;

  kstream_t *ks;
  kstring_t str = {0,0,0};    
  gzFile fp = gzopen(input, "r");
  VarBuffer vbuf(1000);
  int prev_rid = -1;
  if(fp==NULL) {
    fprintf(stderr,"problem opening %s\n",input);
    exit(1);
  }

  char *out_fname = (char *)malloc(strlen(output)+5);
  strcpy(out_fname,output);
  strcat(out_fname,".tmp");
  if(fileexists(out_fname)) {
    fprintf(stderr,"%s file already exists. will not overwrite\n",out_fname);
    exit(1);
  }
  printf("depth: %s\n",out_fname);
  gzFile depth_fp = gzopen(out_fname, "wb1");
  strcpy(out_fname,output);
  strcat(out_fname,".bcf");
  if(fileexists(out_fname)) {
    fprintf(stderr,"%s file already exists. will not overwrite\n",out_fname);
    exit(1);
  }
  printf("variants: %s\n",out_fname);
  htsFile *variant_fp=hts_open(out_fname,"wb1");
  if(variant_fp==NULL) {
    fprintf(stderr,"problem opening %s\n",input);
    exit(1);    
  }

  ks = ks_init(fp);
  htsFile *hfp=hts_open(input, "r");
  bcf_hdr_t *hdr_in =  bcf_hdr_read(hfp);
  hts_close(hfp);
  //this is a hack to fix gvcfs where AD is incorrectly defined in the header. (vcf4.2 does not technically allow Number=R)
  bcf_hdr_remove(hdr_in,BCF_HL_FMT,"AD");
  assert(  bcf_hdr_append(hdr_in,"##FORMAT=<ID=AD,Number=R,Type=Integer,Description=\"Allelic depths for the ref and alt alleles in the order listed. For indels this value only includes reads which confidently support each allele (posterior prob 0.999 or higher that read contains indicated allele vs all other intersecting indel alleles)\">") == 0);

  //this is a hack to fix broken gvcfs where GQ is incorrectly labelled as float (v4.3 spec says it should be integer)
  bcf_hdr_remove(hdr_in,BCF_HL_FMT,"GQ");
  assert(  bcf_hdr_append(hdr_in,"##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">") == 0);


  //  bcf_hdr_t  *hdr_out=hdr_in;
  bcf_hdr_t *hdr_out =  bcf_hdr_dup(hdr_in);
  remove_hdr_lines(hdr_out,BCF_HL_INFO);
  remove_hdr_lines(hdr_out,BCF_HL_FLT);
  bcf_hdr_sync(hdr_out);

  //here we add FORMAT/PF. which is the pass filter flag for alts.
  assert(  bcf_hdr_append(hdr_out,"##FORMAT=<ID=PF,Number=A,Type=Integer,Description=\"variant was PASS filter in original sample gvcf\">") == 0);

  args_t *norm_args = init_vcfnorm(hdr_out,ref);
  norm_args->check_ref |= CHECK_REF_WARN;
  bcf1_t *bcf_rec = bcf_init();
  bcf_hdr_write(variant_fp, hdr_out);
  kstring_t work1 = {0,0,0};            
  int buf[5];
  ks_tokaux_t aux;
  int ndec=0;
  int ref_len,alt_len;
  while(    ks_getuntil(ks, '\n', &str, 0) >=0) {
    //    fprintf(stderr,"%s\n",str.s);
    if(str.s[0]!='#')  {
      char *ptr = kstrtok(str.s,"\t",&aux);//chrom
      ptr = kstrtok(NULL,NULL,&aux);//pos
      work1.l=0;
      kputsn(str.s,ptr-str.s-1, &work1);   
      buf[0] =  bcf_hdr_name2id(hdr_in, work1.s);
      assert(      buf[0]>=0);
      buf[1]=atoi(ptr)-1;
      ptr = kstrtok(NULL,NULL,&aux);//ID
      ptr = kstrtok(NULL,NULL,&aux);//REF

      ref_len=0;
      while(ptr[ref_len]!='\t') ref_len++;

      ptr = kstrtok(NULL,NULL,&aux);//ALT

      bool is_variant=false;
      alt_len=0;
      while(ptr[alt_len]!='\t') alt_len++;
      if(ptr[0]!='.') 
	is_variant=true;

      char * QUAL_ptr = kstrtok(NULL, NULL, &aux);
      assert (QUAL_ptr != NULL);
      
      for(int i=0;i<2;i++)  ptr = kstrtok(NULL,NULL,&aux);// gets us to INFO

      //find END if it is there
      char *end_ptr=strstr(ptr,"END=") ;
      if(end_ptr!=NULL) 
	buf[2]=atoi(end_ptr+4)-1;
      else
	buf[2]=buf[1]+alt_len-1;

      ptr  = kstrtok(NULL,NULL,&aux);//FORMAT
      //find index of DP (if present)
      //if not present, dont output anything (indels ignored)

      char *DP_ptr = find_format(ptr,"DP");
      int GQX = 0;
      int QUAL = 0;

      // AH: change code to use the minimum of GQ and QUAL fields if
      // GQX is not defined. See here:
      // https://support.basespace.illumina.com/knowledgebase/articles/144844-vcf-file
      // "GQXGenotype quality. GQX is the minimum of the GQ value
      // and the QUAL column. In general, these are similar values;
      // taking the minimum makes GQX the more conservative measure of
      // genotype quality."
      if(DP_ptr!=NULL) {
	buf[3]=atoi(DP_ptr);
	char *GQX_ptr = find_format(ptr,"GQX");
	if (GQX_ptr == NULL) 
	  {
	    GQX_ptr = find_format(ptr,"GQ");
	    GQX = atoi(GQX_ptr);
	    if (QUAL_ptr[0] != '.') 
	      {
		QUAL = atoi(QUAL_ptr);
		if (QUAL < GQX)
		  GQX = QUAL;
	      }
	  }
	else
	  {
	    GQX = atoi(GQX_ptr);
	  }
	
	//trying to reduce entropy on GQ to get better compression performance.
	//1. rounds down to nearest 10. 
	//2. sets gq to min(gq,100). 
	buf[4]=GQX/10;
	buf[4]*=10;
	if(buf[4]>100) buf[4]=100;

	//	printf("%d\t%d\t%d\t%d\t%d\n",buf[0],buf[1],buf[2],buf[3],buf[4]);
	if(gzwrite(depth_fp,buf,5*sizeof(int))!=(5*sizeof(int)))
	  die("ERROR: problem writing "+(string)out_fname+".tmp");
      }
      if(is_variant) {//wass this a variant? if so write it out to the bcf
	norm_args->ntotal++;
	vcf_parse(&str,hdr_in,bcf_rec);
	//	cerr<<bcf_rec->rid<<":"<<bcf_rec->pos<<endl;
	if(prev_rid!=bcf_rec->rid) 
	  vbuf.flush(variant_fp,hdr_out);
	else
	  vbuf.flush(bcf_rec->pos,variant_fp,hdr_out);
	prev_rid=bcf_rec->rid;
	int32_t pass = bcf_has_filter(hdr_in, bcf_rec, ".");
	bcf_update_format_int32(hdr_out,bcf_rec,"PF",&pass,1);
	bcf_update_filter(hdr_out,bcf_rec,NULL,0);
	if(bcf_rec->n_allele>2) {//split multi-allelics (using vcfnorm.c from bcftools1.3
	  norm_args->nsplit++;
	  split_multiallelic_to_biallelics(norm_args,bcf_rec );
	  for(int i=0;i<norm_args->ntmp_lines;i++){
	    remove_info(norm_args->tmp_lines[i]);
	    if(realign(norm_args,norm_args->tmp_lines[i]) != ERR_REF_MISMATCH)
	      ndec+=decompose(norm_args->tmp_lines[i],hdr_out,vbuf);
	    else
	      if(exit_on_mismatch)
		die("vcf did not match the reference");
	      else
		norm_args->nskipped++;
	  }
	}
	else {
	  remove_info(bcf_rec);
	  if( realign(norm_args,bcf_rec) !=  ERR_REF_MISMATCH)
	    ndec+=decompose(bcf_rec,hdr_out,vbuf);
	  else
	    if(exit_on_mismatch)
	      die("vcf did not match the reference");
	    else
	      norm_args->nskipped++;
	}
	vbuf.flush(bcf_rec->pos,variant_fp,hdr_out);
      }
    }
  }
  vbuf.flush(variant_fp,hdr_out);
  bcf_hdr_destroy(hdr_in);
  bcf_hdr_destroy(hdr_out);
  bcf_destroy1(bcf_rec);
  ks_destroy(ks);
  gzclose(fp);
  gzclose(depth_fp);  
  free(str.s);
  free(work1.s);
  hts_close(variant_fp);
  destroy_data(norm_args);
  fprintf(stderr,"Variant lines   total/split/realigned/skipped:\t%d/%d/%d/%d\n", norm_args->ntotal,norm_args->nsplit,norm_args->nchanged,norm_args->nskipped);
  fprintf(stderr,"Decomposed %d MNPs\n", ndec);


  fprintf(stderr,"Indexing %s\n",out_fname);
  bcf_index_build(out_fname, BCF_LIDX_SHIFT);
  free(out_fname);
  return 0;
}
Ejemplo n.º 5
0
int main_vcfindex(int argc, char *argv[])
{
    int c, min_shift = 14, force = 0;

    static struct option loptions[] = 
    {
        {"help",0,0,'h'},
        {"force",0,0,'f'},
        {"min-shift",1,0,'m'},
        {0,0,0,0}
    };

    while ((c = getopt_long(argc, argv, "h?fm:", loptions,NULL)) >= 0)
    {
        switch (c) 
        {
            case 'f': force = 1; break;
            case 'm': min_shift = atoi(optarg); break;
            default: usage();
        }
    }
    if ( optind==argc ) usage();
    if (min_shift < 0 || min_shift > 30)
    {
        fprintf(stderr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift);
        return 1;
    }

    char *fname = argv[optind];
    int ftype = hts_file_type(fname);
    if (!ftype)
    {
        fprintf(stderr, "[E::%s] unknown filetype; expected .vcf.gz or .bcf\n", __func__);
        return 1;
    }

    if (!force)
    {
        // Before complaining about existing index, check if the VCF file isn't newer.
        char *idx_fname = (char*)alloca(strlen(fname) + 5);
        strcat(strcpy(idx_fname, fname), min_shift <= 0 ? ".tbi" : ".csi");
        struct stat stat_tbi, stat_file;
        if ( stat(idx_fname, &stat_tbi)==0 )
        {
            stat(fname, &stat_file);
            if ( stat_file.st_mtime <= stat_tbi.st_mtime )
            {
                fprintf(stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
                return 1;
            }
        }
    }

    if (ftype == FT_BCF_GZ)
    {
        if ( bcf_index_build(fname, min_shift) != 0 ) 
        {
            fprintf(stderr,"[E::%s] bcf_index_build failed: %s\n", __func__, fname);
            return 1;
        }        
    }
    else if (ftype == FT_VCF_GZ)
    {
        if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 )
        {
            fprintf(stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
            return 1;
        }
    }
    return 0;
}
Ejemplo n.º 6
0
int main_vcfindex(int argc, char *argv[])
{
    int c, force = 0, tbi = 0;
    int min_shift = BCF_LIDX_SHIFT;

    static struct option loptions[] =
    {
        {"csi",no_argument,NULL,'c'},
        {"tbi",no_argument,NULL,'t'},
        {"force",no_argument,NULL,'f'},
        {"min-shift",required_argument,NULL,'m'},
        {NULL, 0, NULL, 0}
    };

    while ((c = getopt_long(argc, argv, "ctfm:", loptions, NULL)) >= 0)
    {
        switch (c)
        {
            case 'c': tbi = 0; break;
            case 't': tbi = 1; min_shift = 0; break;
            case 'f': force = 1; break;
            case 'm': min_shift = atoi(optarg); break;
            default: usage();
        }
    }
    if ( optind==argc ) usage();
    if (tbi && min_shift>0)
    {
        fprintf(stderr, "[E::%s] min-shift option only expected for CSI indices \n", __func__);
        return 1;
    }
    if (min_shift < 0 || min_shift > 30)
    {
        fprintf(stderr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift);
        return 1;
    }

    char *fname = argv[optind];
    int ftype = hts_file_type(fname);
    if (!ftype || (ftype != FT_BCF_GZ && ftype != FT_VCF_GZ))
    {
        fprintf(stderr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__);
        if (!(ftype & FT_GZ))
            fprintf(stderr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__);
        return 1;
    }
    if (tbi && ftype == FT_BCF_GZ)
    {
        fprintf(stderr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n");
        tbi = 0; min_shift = BCF_LIDX_SHIFT;
    }
    if (min_shift == 0 && ftype == FT_BCF_GZ)
    {
        fprintf(stderr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__);
        return 1;
    }
    if (!tbi && ftype == FT_VCF_GZ && min_shift == 0)
    {
        fprintf(stderr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n");
        tbi = 1;
    }

    if (!force)
    {
        // Before complaining about existing index, check if the VCF file isn't newer.
        char *idx_fname = (char*)alloca(strlen(fname) + 5);
        strcat(strcpy(idx_fname, fname), tbi ? ".tbi" : ".csi");
        struct stat stat_tbi, stat_file;
        if ( stat(idx_fname, &stat_tbi)==0 )
        {
            stat(fname, &stat_file);
            if ( stat_file.st_mtime <= stat_tbi.st_mtime )
            {
                fprintf(stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
                return 1;
            }
        }
    }

    if (ftype == FT_BCF_GZ)
    {
        if ( bcf_index_build(fname, min_shift) != 0 )
        {
            fprintf(stderr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname);
            return 1;
        }
    }
    else
    {
        if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 )
        {
            fprintf(stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
            return 1;
        }
    }
    return 0;
}
Ejemplo n.º 7
0
Archivo: tabix.c Proyecto: Illumina/akt
int main(int argc, char *argv[])
{
    int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0;
    tbx_conf_t conf = tbx_conf_gff;
    char *reheader = NULL;
    args_t args;
    memset(&args,0,sizeof(args_t));

    static const struct option loptions[] =
    {
        {"help", no_argument, NULL, 2},
        {"regions", required_argument, NULL, 'R'},
        {"targets", required_argument, NULL, 'T'},
        {"csi", no_argument, NULL, 'C'},
        {"zero-based", no_argument, NULL, '0'},
        {"print-header", no_argument, NULL, 'h'},
        {"only-header", no_argument, NULL, 'H'},
        {"begin", required_argument, NULL, 'b'},
        {"comment", required_argument, NULL, 'c'},
        {"end", required_argument, NULL, 'e'},
        {"force", no_argument, NULL, 'f'},
        {"preset", required_argument, NULL, 'p'},
        {"sequence", required_argument, NULL, 's'},
        {"skip-lines", required_argument, NULL, 'S'},
        {"list-chroms", no_argument, NULL, 'l'},
        {"reheader", required_argument, NULL, 'r'},
        {"version", no_argument, NULL, 1},
        {NULL, 0, NULL, 0}
    };

    char *tmp;
    while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0)
    {
        switch (c)
        {
            case 'R': args.regions_fname = optarg; break;
            case 'T': args.targets_fname = optarg; break;
            case 'C': do_csi = 1; break;
            case 'r': reheader = optarg; break;
            case 'h': args.print_header = 1; break;
            case 'H': args.print_header = 1; args.header_only = 1; break;
            case 'l': list_chroms = 1; break;
            case '0': conf.preset |= TBX_UCSC; detect = 0; break;
            case 'b':
                conf.bc = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -b %s\n", optarg);
                detect = 0;
                break;
            case 'e':
                conf.ec = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -e %s\n", optarg);
                detect = 0;
                break;
            case 'c': conf.meta_char = *optarg; detect = 0; break;
            case 'f': is_force = 1; break;
            case 'm':
                min_shift = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -m %s\n", optarg);
                break;
            case 'p':
                detect = 0;
                if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff;
                else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed;
                else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam;
                else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf;
                else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed
                else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf
                else error("The preset string not recognised: '%s'\n", optarg);
                break;
            case 's':
                conf.sc = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -s %s\n", optarg);
                detect = 0;
                break;
            case 'S':
                conf.line_skip = strtol(optarg,&tmp,10);
                if ( *tmp ) error("Could not parse argument: -S %s\n", optarg);
                detect = 0;
                break;
            case 1:
                printf(
"tabix (htslib) %s\n"
"Copyright (C) 2017 Genome Research Ltd.\n", hts_version());
                return EXIT_SUCCESS;
            default: return usage();
        }
    }

    if ( optind==argc ) return usage();

    if ( list_chroms )
        return query_chroms(argv[optind]);

    if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname )
    {
        int nregs = 0;
        char **regs = NULL;
        if ( !args.header_only )
            regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs);
        return query_regions(&args, argv[optind], regs, nregs);
    }

    char *fname = argv[optind];
    int ftype = file_type(fname);
    if ( detect )  // no preset given
    {
        if ( ftype==IS_GFF ) conf = tbx_conf_gff;
        else if ( ftype==IS_BED ) conf = tbx_conf_bed;
        else if ( ftype==IS_SAM ) conf = tbx_conf_sam;
        else if ( ftype==IS_VCF )
        {
            conf = tbx_conf_vcf;
            if ( !min_shift && do_csi ) min_shift = 14;
        }
        else if ( ftype==IS_BCF )
        {
            if ( !min_shift ) min_shift = 14;
        }
        else if ( ftype==IS_BAM )
        {
            if ( !min_shift ) min_shift = 14;
        }
    }
    if ( do_csi )
    {
        if ( !min_shift ) min_shift = 14;
        min_shift *= do_csi;  // positive for CSIv2, negative for CSIv1
    }
    if ( min_shift!=0 && !do_csi ) do_csi = 1;

    if ( reheader )
        return reheader_file(fname, reheader, ftype, &conf);

    char *suffix = ".tbi";
    if ( do_csi ) suffix = ".csi";
    else if ( ftype==IS_BAM ) suffix = ".bai";
    else if ( ftype==IS_CRAM ) suffix = ".crai";

    char *idx_fname = calloc(strlen(fname) + 5, 1);
    strcat(strcpy(idx_fname, fname), suffix);

    struct stat stat_tbi, stat_file;
    if ( !is_force && stat(idx_fname, &stat_tbi)==0 )
    {
        // Before complaining about existing index, check if the VCF file isn't
        // newer. This is a common source of errors, people tend not to notice
        // that tabix failed
        stat(fname, &stat_file);
        if ( stat_file.st_mtime <= stat_tbi.st_mtime )
            error("[tabix] the index file exists. Please use '-f' to overwrite.\n");
    }
    free(idx_fname);

    if ( ftype==IS_CRAM )
    {
        if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
        return 0;
    }
    else if ( do_csi )
    {
        if ( ftype==IS_BCF )
        {
            if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname);
            return 0;
        }
        if ( ftype==IS_BAM )
        {
            if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
            return 0;
        }
        if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname);
        return 0;
    }
    else    // TBI index
    {
        if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname);
        return 0;
    }
    return 0;
}