Esempio n. 1
0
File: faidx.c Progetto: pkrusche/vt
int fai_build(const char *fn)
{
	char *str;
	BGZF *bgzf;
	FILE *fp;
	faidx_t *fai;
	str = (char*)calloc(strlen(fn) + 5, 1);
	sprintf(str, "%s.fai", fn);
	bgzf = bgzf_open(fn, "r");
	if ( !bgzf ) {
		fprintf(stderr, "[fai_build] fail to open the FASTA file %s\n",fn);
		free(str);
		return -1;
	}
    if ( bgzf->is_compressed ) bgzf_index_build_init(bgzf);
	fai = fai_build_core(bgzf);
    if ( bgzf->is_compressed ) bgzf_index_dump(bgzf, fn, ".gzi");
	bgzf_close(bgzf);
	fp = fopen(str, "wb");
	if ( !fp ) {
		fprintf(stderr, "[fai_build] fail to write FASTA index %s\n",str);
		fai_destroy(fai); free(str);
		return -1;
	}
	fai_save(fai, fp);
	fclose(fp);
	free(str);
	fai_destroy(fai);
	return 0;
}
SingleChromosomeBCFIndex::SingleChromosomeBCFIndex(
    const std::string& bcfFile, const std::string& indexFile) {
  bcfFile_ = bcfFile;
  indexFile_ = indexFile;
  fBcfFile_ = bgzf_open(bcfFile_.c_str(), "rb");
  data_ = NULL;
}
Esempio n. 3
0
int main_reheader(int argc, char *argv[])
{
    bam_header_t *h;
    BGZF *in;
    if (argc != 3) {
        fprintf(stderr, "Usage: samtools reheader <in.header.sam> <in.bam>\n");
        return 1;
    }
    { // read the header
        tamFile fph = sam_open(argv[1]);
        if (fph == 0) {
            fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]);
            return 1;
        }
        h = sam_header_read(fph);
        sam_close(fph);
    }
    in = strcmp(argv[2], "-")? bgzf_open(argv[2], "r") : bgzf_fdopen(fileno(stdin), "r");
    if (in == 0) {
        fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]);
        return 1;
    }
    bam_reheader(in, h, fileno(stdout));
    bgzf_close(in);
    return 0;
}
Esempio n. 4
0
int print(int argc, char**argv){
  if(argc==0){
    fprintf(stderr,"print FILE [-r chrName]\n");
    exit(0);
  }
  char *base = *argv;
  char* outnames_bin = append(base,BIN);
  char* outnames_idx = append(base,IDX);
  fprintf(stderr,"Assuming binfile:%s and indexfile:%s\n",outnames_bin,outnames_idx);
  
  myMap mm = getMap(outnames_idx);
  writemap(stderr,mm);
  BGZF *fp = bgzf_open(outnames_bin,"r");

  --argc;++argv;
  //  fprintf(stderr,"argc=%d\n",argc);
  int argP =0;
  char *chr=NULL;

  while(argP<argc){
    //   fprintf(stderr,"args=%s\n",argv[argP]);
    if(argP==argc){
      fprintf(stderr,"incomplete arguments list\n");
      exit(0);
    }
    if(strcmp("-r",argv[argP])==0)
      chr = argv[argP+1];
    else {
      fprintf(stderr,"Unknown argument:%s\n",argv[argP]);
      exit(0);

    }
    argP +=2;
  }
  
  
  if(chr!=NULL){  
    myMap::iterator it = mm.find(chr);
    if(it==mm.end()){
      fprintf(stderr,"Problem finding chr: %s in index\n",chr);
      exit(0);
    }
    datum d = it->second;
    bgzf_seek(fp,d.fpos,SEEK_SET);
  }

  while(1){
    perChr pc = getPerChr(fp);
    if(pc.nSites==0)
      break;
    fprintf(stderr,"pc.chr=%s pc.nSites=%zu firstpos=%d lastpos=%d\n",pc.chr,pc.nSites,pc.posi[0],pc.posi[pc.nSites-1]);
    print_main(pc,stdout);
    if(chr!=NULL)
      break;
    dalloc(pc);
  }

  return 0;
}
Esempio n. 5
0
int BGZipFileWriter::open(const char* fn, bool append){
    if (append) 
        fprintf(stderr, "Gzip does not support appending.\n");
    this->fp = bgzf_open(fn, "w");
    if (!this->fp) {
        fprintf(stderr, "ERROR: Cannot open %s for write\n", fn);
        return -1;
    }
    return 0;
}
Esempio n. 6
0
void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters *P) {
      
    if (binS==0) return; //nothing to do for empty bins
    //allocate arrays
    char *bamIn=new char[binS];
    uint *startPos=new uint[binN*3];

    uint bamInBytes=0;
    //load all aligns
    for (uint it=0; it<nThreads; it++) {
        string bamInFile=dirBAMsort+to_string(it)+"/"+to_string((uint) iBin);
        ifstream bamInStream (bamInFile.c_str());
        bamInStream.read(bamIn+bamInBytes,binS);//read the whole file
        bamInBytes += bamInStream.gcount();
        bamInStream.close();
        remove(bamInFile.c_str());
    };
    if (bamInBytes!=binS) {
        ostringstream errOut;
        errOut << "EXITING because of FATAL ERROR: number of bytes expected from the BAM bin does not agree with the actual size on disk: ";
        errOut << binS <<"   "<< bamInBytes <<"   "<< iBin <<"\n";
        exitWithError(errOut.str(),std::cerr, P->inOut->logMain, 1, *P);
    };
  
    //extract coordinates
    
    for (uint ib=0,ia=0;ia<binN;ia++) {
        uint32 *bamIn32=(uint32*) (bamIn+ib);
        startPos[ia*3]  =( ((uint) bamIn32[1]) << 32) | ( (uint)bamIn32[2] );
        startPos[ia*3+2]=ib;      
        ib+=bamIn32[0]+sizeof(uint32);//note that size of the BAM record does not include the size record itself
        startPos[ia*3+1]=*( (uint*) (bamIn+ib) ); //read order
        ib+=sizeof(uint);
    };
        
    //sort
    qsort((void*) startPos, binN, sizeof(uint)*3, funCompareUint2);
    
    BGZF *bgzfBin;
    bgzfBin=bgzf_open((dirBAMsort+"/b"+to_string((uint) iBin)).c_str(),("w"+to_string((long long) P->outBAMcompression)).c_str());
    outBAMwriteHeader(bgzfBin,P->samHeaderSortedCoord,P->chrName,P->chrLength);
    //send ordered aligns to bgzf one-by-one
    for (uint ia=0;ia<binN;ia++) {
        char* ib=bamIn+startPos[ia*3+2];
        bgzf_write(bgzfBin,ib, *((uint32*) ib)+sizeof(uint32) ); 
    };
    
    bgzf_flush(bgzfBin);
    bgzf_close(bgzfBin);
    //release memory
    delete [] bamIn;
    delete [] startPos;
};
Esempio n. 7
0
faidx_t *fai_load(const char *fn)
{
    char *str;
    FILE *fp;
    faidx_t *fai;
    str = (char*)calloc(strlen(fn) + 5, 1);
    sprintf(str, "%s.fai", fn);

#ifdef _USE_KNETFILE
    if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)
    {
        fp = download_and_open(str);
        if ( !fp )
        {
            fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str);
            free(str);
            return 0;
        }
    }
    else
#endif
        fp = fopen(str, "rb");
    if (fp == 0) {
        fprintf(stderr, "[fai_load] build FASTA index.\n");
        fai_build(fn);
        fp = fopen(str, "rb");
        if (fp == 0) {
            fprintf(stderr, "[fai_load] fail to open FASTA index.\n");
            free(str);
            return 0;
        }
    }

    fai = fai_read(fp);
    fclose(fp);

    fai->bgzf = bgzf_open(fn, "rb");
    free(str);
    if (fai->bgzf == 0) {
        fprintf(stderr, "[fai_load] fail to open FASTA file.\n");
        return 0;
    }
    if ( fai->bgzf->is_compressed==1 )
    {
        if ( bgzf_index_load(fai->bgzf, fn, ".gzi") < 0 )
        {
            fprintf(stderr, "[fai_load] failed to load .gzi index: %s[.gzi]\n", fn);
            fai_destroy(fai);
            return NULL;
        }
    }
    return fai;
}
Esempio n. 8
0
ifq_codes_t
ifq_open_index(char *fastq_path, char *index_prefix, ifq_index_t *index)
{
    char *hash_path = concatenate( index_prefix, ".hsh" );
    char *lookup_path = concatenate( index_prefix, ".lup" );

    ifq_codes_t ret = IFQ_OK;

    index->fastq_file = bgzf_open( fastq_path , "r" );
    if( index->fastq_file == NULL )
    {
        ret = IFQ_BAD_FASTQ;
        goto index_error;
    }
    
    index->hash_file = fopen( hash_path , "r" );
    if( index->hash_file == NULL )
    {
        ret = IFQ_BAD_PREFIX;
        goto index_error;
    }

    index->hash = cmph_load( index->hash_file );
    if( index->hash == NULL )
    {
        ret = IFQ_BAD_HASH;
        goto index_error;
    }

    index->lookup_fd = open( lookup_path, O_RDWR );
    if( index->lookup_fd == -1 )
    {
        ret = IFQ_BAD_PREFIX;
        goto index_error;
    }
    
    struct stat sb;
    fstat( index->lookup_fd, &sb );
    index->lookup_size = sb.st_size;

    index->table = (uint64_t *) mmap( NULL, index->lookup_size, PROT_READ, MAP_FILE | MAP_SHARED, index->lookup_fd, 0 );
    if( index->table == MAP_FAILED )
    {
        ret = IFQ_BAD_INDEX;
        goto index_error;
    }

index_error: 
    free( hash_path );
    free( lookup_path );

    return ret;
}
Esempio n. 9
0
int val_bed(int argc, char**argv){
  if(argc!=1){
    fprintf(stderr,"val_bed FILE.gz \n");
    exit(0);
  }
  char *base = *argv;
  char* outnames_bin = append(base,BIN);
  char* outnames_gz = base;
  fprintf(stderr,"Assuming binfile:%s and gzfile:%s\n",outnames_bin,outnames_gz);
  
  
  BGZF *fp = bgzf_open(outnames_bin,"r");
  gzFile gz =gzopen(outnames_gz,"r");
  char buf[4096];
  gzgets(gz,buf,4096);
  while(1){
    perChr pc = getPerChr(fp);
    if(pc.nSites==0)
      break;
    fprintf(stderr,"pc.chr=%s pc.nSites=%zu firstpos=%d lastpos=%d\n",pc.chr,pc.nSites,pc.posi[0],pc.posi[pc.nSites-1]);
    for(size_t i=0;i<pc.nSites;i++){
      gzgets(gz,buf,4096);
      char *chr = strtok(buf,"\n\t ");
      if(strcmp(chr,pc.chr)!=0){
	fprintf(stderr,"Problem with nonmatching chromosome: \'%s\' vs \'%s\'\n",chr,pc.chr);
	exit(0);
      }
      int posi =atoi(strtok(NULL,"\t\n "));
      if(posi!=pc.posi[i]){
	fprintf(stderr,"Problem with nonmatching position\n");
	exit(0);
      }
      float tW = atof(strtok(NULL,"\t\n "));
      float tP = atof(strtok(NULL,"\t\n "));
      float tF = atof(strtok(NULL,"\t\n "));
      float tH = atof(strtok(NULL,"\t\n "));
      float tL = atof(strtok(NULL,"\t\n "));
      fun(tW,pc.tW[i]);
      fun(tP,pc.tP[i]);
      fun(tF,pc.tF[i]);
      fun(tH,pc.tH[i]);
      fun(tL,pc.tL[i]);
    }
    fprintf(stderr,"FILE: %s chr: %s OK\n",base,pc.chr); 
    dalloc(pc);
  }

  fprintf(stderr,"ALL OK: %s\n",base);
  return 0;
}
Esempio n. 10
0
int main(int argc, char *argv[]) {
    if (argc <= 1) {
        fprintf(stderr, "Usage: thrash_threads1 input.bam\n");
        exit(1);
    }

    int i;
    for (i = 0; i < 10000; i++) {
        printf("i=%d\n", i);
        BGZF *fpin  = bgzf_open(argv[1], "r");
        bgzf_mt(fpin, 2, 256);
        if (bgzf_close(fpin) < 0) abort();
    }
    return 0;
}
Esempio n. 11
0
int main_getalt(int argc, char *argv[])
{
	int c;
	char *fn;
	BGZF *fp;
	bcf1_t *b;
	bcf_hdr_t *h;
	kstring_t s = {0,0,0};

	while ((c = getopt(argc, argv, "")) >= 0) {
	}
	if (argc - optind == 0) {
		fprintf(stderr, "Usage: bgt getalt <bgt-base>\n");
		return 1;
	}

	fn = (char*)calloc(strlen(argv[optind]) + 5, 1);
	sprintf(fn, "%s.bcf", argv[optind]);
	fp = bgzf_open(fn, "r");
	free(fn);
	assert(fp);

	h = bcf_hdr_read(fp);
	b = bcf_init1();
	while (bcf_read1(fp, b) >= 0) {
		char *ref, *alt;
		int l_ref, l_alt, i, min_l;
		bcf_get_ref_alt1(b, &l_ref, &ref, &l_alt, &alt);
		min_l = l_ref < l_alt? l_ref : l_alt;
		for (i = 0; i < min_l && ref[i] == alt[i]; ++i);
		s.l = 0;
		kputs(h->id[BCF_DT_CTG][b->rid].key, &s);
		kputc(':', &s); kputw(b->pos + 1 + i, &s);
		kputc(':', &s); kputw(b->rlen - i, &s);
		kputc(':', &s); kputsn(alt + i, l_alt - i, &s);
		puts(s.s);
	}
	bcf_destroy1(b);
	bcf_hdr_destroy(h);

	bgzf_close(fp);
	free(s.s);
	return 0;
}
Esempio n. 12
0
BgzfFileType::BgzfFileType(const char * filename, const char * mode)
{
    // If the file is for write and is '-', then write to stdout.
    if(((mode[0] == 'w') || (mode[0] == 'W')) && 
       (strcmp(filename, "-") == 0))
    {
        // Write to stdout.
        bgzfHandle = bgzf_fdopen(fileno(stdout), mode);
    }
    else if(((mode[0] == 'r') || (mode[0] == 'R')) && 
       (strcmp(filename, "-") == 0))
    {
        // read from stdin
        bgzfHandle = bgzf_fdopen(fileno(stdin), mode);
    }
    else
    {
        bgzfHandle = bgzf_open(filename, mode);
    }

    myStartPos = 0;
    if (bgzfHandle != NULL)
    {
        // Check to see if the file is being opened for read, if the eof block
        // is required, and if it is, if it is there.
        if ((mode[0] == 'r' || mode[0] == 'R') && ourRequireEofBlock &&
                (bgzf_check_EOF(bgzfHandle) == 0))
        {
            std::cerr << "BGZF EOF marker is missing in " << filename << std::endl;
            // the block is supposed to be there, but isn't, so close the file.
            close();
        }
        else
        {
            // Successfully opened a properly formatted file, so get the start
            // position.
            myStartPos = bgzf_tell(bgzfHandle);
        }
    }

    myEOF = false;
}
Esempio n. 13
0
File: bgzf.c Progetto: geneva/POPBAM
int bgzf_check_bgzf(const char *fn)
{
    BGZF *fp;
    unsigned char buf[10];
    unsigned char magic[]="\037\213\010\4\0\0\0\0\0\377";
    int n;

    if ((fp = bgzf_open(fn, "r")) == 0)
    {
        fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
        return -1;
    }

    n = fread(buf, 1, 10, fp->file);
    bgzf_close(fp);

    if (n != 10)
        return -1;

    if (!memcmp(magic, buf, 10))
        return 1;
    return 0;
}
Esempio n. 14
0
int do_stat(int argc, char**argv){
  if(argc==0){
    fprintf(stderr,"do_stat FILE -win -step -nChr [-r chrName -type [0,1,2]]\n");
    exit(0);
  }
  char *base = *argv;
  char* outnames_bin = append(base,BIN);
  char* outnames_idx = append(base,IDX);
  fprintf(stderr,"\tAssuming binfile:%s and indexfile:%s\n",outnames_bin,outnames_idx);
  
  myMap mm = getMap(outnames_idx);
  writemap(stderr,mm);
  BGZF *fp = bgzf_open(outnames_bin,"r");

  --argc;++argv;
  //  fprintf(stderr,"argc=%d\n",argc);
  int argP =0;
  char *chr=NULL;
  char *outnames = NULL;
  int nChr =0;
  int win =0;
  int step =0;
  int type =0;
  while(argP<argc){
    //   fprintf(stderr,"args=%s\n",argv[argP]);
    if(argP==argc){
      fprintf(stderr,"incomplete arguments list\n");
      exit(0);
    }
    if(strcmp("-r",argv[argP])==0)
      chr = argv[argP+1];
    else if(strcmp("-outnames",argv[argP])==0)
      outnames = argv[argP+1];
    else if(strcmp("-step",argv[argP])==0)
      step = atoi(argv[argP+1]);
    else if(strcmp("-win",argv[argP])==0)
      win = atoi(argv[argP+1]);
    else if(strcmp("-nChr",argv[argP])==0)
      nChr = atoi(argv[argP+1]);
    else if(strcmp("-type",argv[argP])==0)
      type = atoi(argv[argP+1]);
    
    else {
      fprintf(stderr,"Unknown argument:%s\n",argv[argP]);
      exit(0);
    }
    argP +=2;
  }

  fprintf(stderr,"\t -r=%s outnames=%s step: %d win: %d nChr:%d\n",chr,outnames,step,win,nChr);
  if(nChr==0){
    fprintf(stderr,"nChr must be different from zero\n");
    exit(0);
  }
  if(win==0||step==0){
    fprintf(stderr,"\tWinsize equals zero or step size equals zero. Will use entire chromosome as window\n");
    win=step=0;
  }  
  
  if(chr!=NULL){  
    myMap::iterator it = mm.find(chr);
    if(it==mm.end()){
      fprintf(stderr,"\tProblem finding chr: %s in index\n",chr);
      exit(0);
    }
    datum d = it->second;
    bgzf_seek(fp,d.fpos,SEEK_SET);
  }
  if(outnames==NULL)
    outnames = base;

  char *resname = append(outnames,RES);
  FILE *fpres = fopen(resname,"w");
  //fprintf(fpres,"## thetaStat VERSION: %s build:(%s,%s)\n",VERSION,__DATE__,__TIME__);
  fprintf(fpres,"#(indexStart,indexStop)(firstPos_withData,lastPos_withData)(WinStart,WinStop)\t");
  fprintf(fpres,"Chr\tWinCenter\t");
  fprintf(fpres,"tW\ttP\ttF\ttH\ttL\t");
  fprintf(fpres,"Tajima\tfuf\tfud\tfayh\tzeng\tnSites\n");
  while(1){
    perChr pc = getPerChr(fp);
    if(pc.nSites==0)
      break;
    fprintf(stderr,"\tpc.chr=%s pc.nSites=%zu firstpos=%d lastpos=%d\n",pc.chr,pc.nSites,pc.posi[0],pc.posi[pc.nSites-1]);
    kstring_t str = do_stat_main(pc,step,win,nChr,type);
    fwrite(str.s,1,str.l,fpres);//should clean up str, doesn't matter for this program;
    fflush(fpres);
    if(chr!=NULL)
      break;
    dalloc(pc);
  }
  fclose(fpres);
  fprintf(stderr,"\tDumping file: \"%s\"\n",resname);
  return 0;
}
Esempio n. 15
0
ifq_codes_t ifq_create_index(char *fastq_path, char *index_prefix)
{
    char *hash_path = concatenate( index_prefix, ".hsh" );
    char *seek_path = concatenate( index_prefix, ".lup" );
    ifq_codes_t ret = IFQ_OK;
    
    /* Open output files */
    BGZF *fastq_file = bgzf_open( fastq_path, "r" );
    if( fastq_file == NULL )
    {
        ret = IFQ_BAD_FASTQ;
        goto index_fastq_fail;
    }
    
    FILE *hash_file = fopen( hash_path, "w" );
    if( hash_file == NULL )
    {
        ret = IFQ_BAD_PREFIX;
        goto index_prefix_fail;
    }

    /* Create hash function */
    cmph_io_adapter_t *source = cmph_io_fastq_adapter( fastq_file );
    if( source == NULL )
    {
        ret = IFQ_BAD_HASH;
        goto index_prefix_fail;
    }

    cmph_config_t *config = cmph_config_new( source );
    cmph_config_set_algo( config, CMPH_CHD );
    cmph_config_set_mphf_fd( config, hash_file );
    cmph_t *hash = cmph_new( config );
    if( hash == NULL )
    {
        ret = IFQ_BAD_HASH;
        goto index_hash_fail;
    }

    /* Create the file index using the hash */
    bgzf_seek( fastq_file, 0, SEEK_SET );
    if( create_index( fastq_file, hash, seek_path ) != 1 )
    {
        ret = IFQ_BAD_INDEX;
        goto index_create_fail;
    }
index_fastq_fail:
    free( hash_path );
    free( seek_path );

index_create_fail:
    cmph_config_destroy( config );
    cmph_dump( hash, hash_file );
    cmph_destroy( hash );
    free( source );

index_hash_fail:
    fclose( hash_file );

index_prefix_fail:
    bgzf_close( fastq_file );

    return ret;
}
Esempio n. 16
0
void make_bed(int argc,char **argv){
  //  fprintf(stderr,"[%s] \n",__FUNCTION__);
  if(argc==0){
    fprintf(stderr,"make_bed FILE.theta.gz [OUTNAMES] (if OUTNAMES is supplied, this will be used as prefix \n");
    exit(0);
  }
  
  if(!fexists(argv[0])){
    fprintf(stderr,"Problem opening file: %s\n",argv[0]);
    exit(0);
  }
  char *base = argv[0];
  if(argc==2)
    base = argv[1];

  char* outnames_bin = append(base,BIN);
  char* outnames_idx = append(base,IDX);
    
  const char *delims = "\t \n";
  gzFile gfp = gzopen(argv[0],"r");
  char *buf = new char[LENS];
  BGZF *cfpD = bgzf_open(outnames_bin,"w9");
  FILE *fp =fopen(outnames_idx,"w");
  
  std::vector<the_t> vec;
  char *lastChr = NULL;
  
  
  while(gzgets(gfp,buf,LENS)){
    char *chr = strtok(buf,delims);
    if(chr[0]=='#')
      continue;
    int posi=atoi(strtok(NULL,delims)) ;
    
    if(lastChr==NULL){
      lastChr = strdup(chr);
    }else if(strcmp(lastChr,chr)!=0){
      int64_t id=writeAll(vec,lastChr,cfpD);//write data
      write_index(vec.size(),lastChr,fp,id);//write index;
      
      vec.clear();
      free(lastChr);
      lastChr=strdup(chr);
    }
    the_t t;
    t.posi =posi;
    float *the =new float[5];
    for(int i=0;i<5;i++)
      the[i] = atof(strtok(NULL,delims)) ;
    t.vals = the;
    vec.push_back(t);
#if 0
    fprintf(stderr,"%s %d ",chr,posi);
    for(int i=0;i<5;i++)
      fprintf(stderr," %f",the[i]);
    fprintf(stderr,"\n");
#endif
  }
  int64_t id=writeAll(vec,lastChr,cfpD);//write data
  write_index(vec.size(),lastChr,fp,id);//write index;
  vec.clear();
  free(lastChr);
  
  fprintf(stderr,"\tHas dumped files:\n\t\t'%s\'\n\t\t\'%s\'\n",outnames_bin,outnames_idx);
  bgzf_close(cfpD);
  fclose(fp);
  
  gzclose(gfp);
  delete [] buf;
  delete [] outnames_bin; delete [] outnames_idx;
}
Esempio n. 17
0
int main(int argc, char *argv[]) {
    if (argc <= 1) {
        fprintf(stderr, "Usage: thrash_threads4 input.bam\n");
        exit(1);
    }

    // Find a valid seek location ~64M into the file
    int i;
    ssize_t got;
    BGZF *fpin  = bgzf_open(argv[1], "r");
    uint64_t upos = 0, uend = 0;
    char buf[100000];
    for (i = 0; i < 100; i++) {
        if ((got = bgzf_read(fpin, buf, 65536)) < 0)
            abort();
        upos += got;
    }
    int64_t pos = bgzf_tell(fpin);
    while ((got = bgzf_read(fpin, buf, 65536)) > 0) {
        uend += got;
    }
    if (got < 0) abort();
    int64_t end = bgzf_tell(fpin);
    bgzf_close(fpin);

    // Ensure input is big enough to avoid case 3,4 below going off the end
    // of the file
    if (uend < upos + 10000000) {
        fprintf(stderr, "Please supply a bigger input file\n");
        exit(1);
    }

#define N 1000

    // Spam random seeks & reads
    for (i = 0; i < 1000; i++) {
        printf("i=%d\t", i);
        fpin  = bgzf_open(argv[1], "r");
        int j, eof = 0, mt = 0;
        for (j = 0; j < 80; j++) {
            int n = rand() % 7;
            putchar('0'+n); fflush(stdout);
            switch (n) {
            case 0: // start
                if (bgzf_seek(fpin, 0LL, SEEK_SET) < 0) puts("!");//abort();
                eof = 0;
                break;
            case 1: // mid
                if (bgzf_seek(fpin, pos, SEEK_SET) < 0) puts("!");//abort();
                eof = 0;
                break;
            case 2: // eof
                if (bgzf_seek(fpin, end, SEEK_SET) < 0) puts("!");//abort();
                eof = 1;
                break;
            case 3: case 4: {
                int l = rand()%(n==3?100000:100);
                if (bgzf_read(fpin, buf, l) != l*(1-eof)) abort();
                break;
            }
            case 5:
                usleep(N);
                break;
            case 6:
                if (!mt)
                    bgzf_mt(fpin, 8, 256);
                mt = 1;
                break;
            }
        }
        printf("\n");
        if (bgzf_close(fpin))
            abort();
    }

    return 0;
}
Esempio n. 18
0
void bcf_file::print_bcf(const parameters &params)
{
	LOG.printLOG("Outputting BCF file...\n");
	BGZF * out;
	if(!params.stream_out)
	{
		string output_file = params.output_prefix + ".recode.bcf";
		out = bgzf_open(output_file.c_str(), "w");
	}
	else
		out = bgzf_dopen(1, "w");

	string header_str;
	uint32_t len_text = 0;
	vector<char> header;

	char magic[5] = {'B','C','F','\2','\2'};
	bgzf_write(out, magic, 5);

	for (unsigned int ui=0; ui<meta_data.lines.size(); ui++)
	{
		for (unsigned int uj=0; uj<meta_data.lines[ui].length(); uj++)
			header.push_back( meta_data.lines[ui][uj] );
		header.push_back('\n');
	}

	header_str = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
	if (meta_data.N_indv > 0)
		header_str += "\tFORMAT";

	for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
		if (include_indv[ui])
	{
		header_str += "\t";
		header_str += meta_data.indv[ui];
	}
	header_str += "\n";

	for (unsigned int ui=0; ui<header_str.length(); ui++)
		header.push_back( header_str[ui] );
	header.push_back( '\0' );
	len_text = header.size();

	bgzf_write(out, (char *)&len_text, sizeof(len_text) );
	bgzf_write(out, (char *)&header[0], len_text );
	vector<char> variant_line;
	entry * e = new bcf_entry(meta_data, include_indv);
	while(!eof())
	{
		get_entry(variant_line);
		e->reset(variant_line);
		N_entries += e->apply_filters(params);
		if(!e->passed_filters)
			continue;
		N_kept_entries++;
		e->parse_basic_entry(true, true, true);
		e->parse_full_entry(true);
		e->parse_genotype_entries(true);
		e->print_bcf(out, params.recode_INFO_to_keep, params.recode_all_INFO);
	}
	delete e;
	bgzf_close(out);
}
Esempio n. 19
0
void vcf_file::print_bcf(const parameters &params)
{
	LOG.printLOG("Outputting BCF file...\n");
	BGZF * out;
	if(!params.stream_out)
	{
		string output_file = params.output_prefix + ".recode.bcf";
		out = bgzf_open(output_file.c_str(), "w");
	}
	else
		out = bgzf_dopen(1, "w");

	string header_str;
	uint32_t len_text = 0;
	vector<char> header;

	char magic[5] = {'B','C','F','\2', '\1'};
	bgzf_write(out, magic, 5);

	if (meta_data.has_idx)
	{
		LOG.warning("VCF file contains IDX values in header. These are being removed for conversion to BCF.");
		meta_data.reprint();
		meta_data.reparse();
	}
	for (unsigned int ui=0; ui<meta_data.lines.size(); ui++)
	{
		for (unsigned int uj=0; uj<meta_data.lines[ui].length(); uj++)
			header.push_back( meta_data.lines[ui][uj] );
		header.push_back('\n');
	}

	if (meta_data.has_contigs == false)
	{
		vector<string> contig_vector;
		get_contigs(params.contigs_file, contig_vector);

		for(unsigned int ui=0; ui<contig_vector.size(); ui++)
		{
			meta_data.add_CONTIG_descriptor(contig_vector[ui].substr(10, contig_vector[ui].size()-8),int(ui));
			for(unsigned int uj=0; uj<contig_vector[ui].size(); uj++)
				header.push_back(contig_vector[ui][uj]);
			header.push_back('\n');
		}
	}

	header_str = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
	if (meta_data.N_indv > 0)
		header_str += "\tFORMAT";

	for (unsigned int ui=0; ui<meta_data.N_indv; ui++)
		if (include_indv[ui])
		{
			header_str += "\t";
			header_str += meta_data.indv[ui];
		}
	header_str += "\n";

	for (unsigned int ui=0; ui<header_str.length(); ui++)
		header.push_back( header_str[ui] );

	header.push_back( '\0' );
	len_text = header.size();

	bgzf_write(out, (char *)&len_text, sizeof(len_text) );
	bgzf_write(out, (char *)&header[0], len_text );

	vector<char> variant_line;
	entry * e = new vcf_entry(meta_data, include_indv);
	while(!eof())
	{
		get_entry(variant_line);
		e->reset(variant_line);
		N_entries += e->apply_filters(params);
		if(!e->passed_filters)
			continue;
		N_kept_entries++;
		e->parse_basic_entry(true, true, true);
		e->parse_full_entry(true);
		e->parse_genotype_entries(true,true,true,true);
		e->print_bcf(out, params.recode_INFO_to_keep, params.recode_all_INFO);
	}
	delete e;
	bgzf_close(out);
}
Esempio n. 20
0
perpsmc * perpsmc_init(char *fname){
  perpsmc *ret = new perpsmc ;
  ret->fname = strdup(fname);
  ret->gls =NULL;
  ret->pos = NULL;
  ret->bgzf_pos=ret->bgzf_gls=NULL;
  ret->pos = NULL;
  size_t clen;
  if(!fexists(fname)){
    fprintf(stderr,"\t-> Problem opening file: \'%s\'\n",fname);
    exit(0);
  }
  FILE *fp = NULL;
  fp=fopen(fname,"r");
  if(fp==NULL){
    fprintf(stderr,"\t-> Problem opening file:%s\n",fname);
    exit(0);
  }
  char buf[8];
  assert(fread(buf,1,8,fp)==8);
  ret->version = psmcversion(fname);
  fprintf(stderr,"\t-> Version of fname: \'%s\' is:%d\n",fname,ret->version);
  if(ret->version!=1){
    fprintf(stderr,"\t-> Looks like you are trying to use a version of PSMC that does not exists\n");
    exit(0);
  }
  ret->nSites =0;
  while(fread(&clen,sizeof(size_t),1,fp)){
    char *chr = (char*)malloc(clen+1);
    assert(clen==fread(chr,1,clen,fp));
    chr[clen] = '\0';
    
    datum d;
    if(1!=fread(&d.nSites,sizeof(size_t),1,fp)){
      fprintf(stderr,"[%s.%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname);
      exit(0);
    }
    ret->nSites += d.nSites;
    if(1!=fread(&d.pos,sizeof(int64_t),1,fp)){
      fprintf(stderr,"[%s->%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname);
      exit(0);
    }
    if(1!=fread(&d.saf,sizeof(int64_t),1,fp)){
      fprintf(stderr,"[%s->%s():%d] Problem reading data: %s \n",__FILE__,__FUNCTION__,__LINE__,fname);
      exit(0);
    }
  
    myMap::iterator it = ret->mm.find(chr);
    if(it==ret->mm.end())
      ret->mm[chr] =d ;
    else{
      fprintf(stderr,"Problem with chr: %s, key already exists, psmc file needs to be sorted. (sort your -rf that you used for input)\n",chr);
      exit(0);
    }
  }
  fclose(fp);
  char *tmp =(char*)calloc(strlen(fname)+100,1);//that should do it
  tmp=strncpy(tmp,fname,strlen(fname)-3);
  //  fprintf(stderr,"tmp:%s\n",tmp);
  
  char *tmp2 = (char*)calloc(strlen(fname)+100,1);//that should do it
  snprintf(tmp2,strlen(fname)+100,"%sgz",tmp);
  fprintf(stderr,"\t-> Assuming .psmc.gz file: %s\n",tmp2);
  ret->bgzf_gls = bgzf_open(tmp2,"r");
  if(ret->bgzf_gls)
    my_bgzf_seek(ret->bgzf_gls,8,SEEK_SET);
  if(ret->bgzf_gls && ret->version!=psmcversion(tmp2)){
    fprintf(stderr,"\t-> Problem with mismatch of version of %s vs %s %d vs %d\n",fname,tmp2,ret->version,psmcversion(tmp2));
    exit(0);
  }

  snprintf(tmp2,strlen(fname)+100,"%spos.gz",tmp);
  fprintf(stderr,"\t-> Assuming .psmc.pos.gz: %s\n",tmp2);
  ret->bgzf_pos = bgzf_open(tmp2,"r");
  if(ret->pos)
    my_bgzf_seek(ret->bgzf_pos,8,SEEK_SET);
  if(ret->bgzf_pos&& ret->version!=psmcversion(tmp2)){
    fprintf(stderr,"Problem with mismatch of version of %s vs %s\n",fname,tmp2);
    exit(0);
  }
  //assert(ret->pos!=NULL&&ret->saf!=NULL);
  free(tmp);free(tmp2);
  
 return ret;
 }
Esempio n. 21
0
int main_tabix(int argc, char *argv[])
{
    int c, min_shift = -1, is_force = 0, is_all = 0;
    tbx_conf_t conf = tbx_conf_gff, *conf_ptr = NULL;
    while ((c = getopt(argc, argv, "0fap:s:b:e:S:c:m:")) >= 0)
        if (c == '0') conf.preset |= TBX_UCSC;
        else if (c == 'f') is_force = 1;
        else if (c == 'a') is_all = 1;
        else if (c == 'm') min_shift = atoi(optarg);
        else if (c == 's') conf.sc = atoi(optarg);
        else if (c == 'b') conf.bc = atoi(optarg);
        else if (c == 'e') conf.ec = atoi(optarg);
        else if (c == 'c') conf.meta_char = *optarg;
        else if (c == 'S') conf.line_skip = atoi(optarg);
        else if (c == 'p') {
            if (strcmp(optarg, "gff") == 0) conf_ptr = &tbx_conf_gff;
            else if (strcmp(optarg, "bed") == 0) conf_ptr = &tbx_conf_bed;
            else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam;
            else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf;
            else {
                fprintf(stderr, "The type '%s' not recognised\n", optarg);
                return 1;
            }

        }
    if (optind == argc) {
        fprintf(stderr, "\nUsage: bcftools tabix [options] <in.gz> [reg1 [...]]\n\n");
        fprintf(stderr, "Options: -p STR    preset: gff, bed, sam or vcf [gff]\n");
        fprintf(stderr, "         -s INT    column number for sequence names (suppressed by -p) [1]\n");
        fprintf(stderr, "         -b INT    column number for region start [4]\n");
        fprintf(stderr, "         -e INT    column number for region end (if no end, set INT to -b) [5]\n");
        fprintf(stderr, "         -0        specify coordinates are zero-based\n");
        fprintf(stderr, "         -S INT    skip first INT lines [0]\n");
        fprintf(stderr, "         -c CHAR   skip lines starting with CHAR [null]\n");
        fprintf(stderr, "         -a        print all records\n");
        fprintf(stderr, "         -f        force to overwrite existing index\n");
        fprintf(stderr, "         -m INT    set the minimal interval size to 1<<INT; 0 for the old tabix index [0]\n");
        fprintf(stderr, "\n");
        return 1;
    }
    if (is_all) { // read without random access
        kstring_t s;
        BGZF *fp;
        s.l = s.m = 0; s.s = 0;
        fp = bgzf_open(argv[optind], "r");
        while (bgzf_getline(fp, '\n', &s) >= 0) puts(s.s);
        bgzf_close(fp);
        free(s.s);
    } else if (optind + 2 > argc) { // create index
        if ( !conf_ptr )
        {
            // auto-detect file type by file name
            int l = strlen(argv[optind]);
            int strcasecmp(const char *s1, const char *s2);
            if (l>=7 && strcasecmp(argv[optind]+l-7, ".gff.gz") == 0) conf_ptr = &tbx_conf_gff;
            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".bed.gz") == 0) conf_ptr = &tbx_conf_bed;
            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".sam.gz") == 0) conf_ptr = &tbx_conf_sam;
            else if (l>=7 && strcasecmp(argv[optind]+l-7, ".vcf.gz") == 0) conf_ptr = &tbx_conf_vcf;
        }
        if ( conf_ptr ) conf = *conf_ptr;

        if (!is_force) {
            char *fn;
            FILE *fp;
            fn = (char*)alloca(strlen(argv[optind]) + 5);
            strcat(strcpy(fn, argv[optind]), min_shift <= 0? ".tbi" : ".csi");
            if ((fp = fopen(fn, "rb")) != 0) {
                fclose(fp);
                fprintf(stderr, "[E::%s] the index file exists; use option '-f' to overwrite\n", __func__);
                return 1;
            }
        }
        if ( tbx_index_build(argv[optind], min_shift, &conf) )
        {
            fprintf(stderr,"tbx_index_build failed: Is the file bgzip-compressed? Was wrong -p [type] option used?\n");
            return 1;
        }
    } else { // read with random access
        tbx_t *tbx;
        BGZF *fp;
        kstring_t s;
        int i;
        if ((tbx = tbx_index_load(argv[optind])) == 0) return 1;
        if ((fp = bgzf_open(argv[optind], "r")) == 0) return 1;
        s.s = 0; s.l = s.m = 0;
        for (i = optind + 1; i < argc; ++i) {
            hts_itr_t *itr;
            if ((itr = tbx_itr_querys(tbx, argv[i])) == 0) continue;
            while (tbx_bgzf_itr_next(fp, tbx, itr, &s) >= 0) puts(s.s);
            tbx_itr_destroy(itr);
        }
        free(s.s);
        bgzf_close(fp);
        tbx_destroy(tbx);
    }
    return 0;
}
Esempio n. 22
0
static void naive_concat(args_t *args)
{
    // only compressed BCF atm
    BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;

    const size_t page_size = 32768;
    char *buf = (char*) malloc(page_size);
    kstring_t tmp = {0,0,0};
    int i;
    for (i=0; i<args->nfnames; i++)
    {
        htsFile *hts_fp = hts_open(args->fnames[i],"r");
        if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
        htsFormat type = *hts_get_format(hts_fp);

        if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
        if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");

        BGZF *fp = hts_get_bgzfp(hts_fp);
        if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
            error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));

        uint8_t magic[5];
        if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
        if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);

        if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
        hts_expand(char,tmp.l,tmp.m,tmp.s);
        if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);

        // write only the first header
        if ( i==0 )
        {
            if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
            if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
            if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
        }

        // Output all non-header data that were read together with the header block
        int nskip = fp->block_offset;
        if ( fp->block_length - nskip > 0 )
        {
            if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
        }
        if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);


        // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
        ssize_t nread, ncached = 0, nwr;
        const int neof = 28;
        char cached[neof];
        while (1)
        {
            nread = bgzf_raw_read(fp, buf, page_size);

            // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
            if ( nread<=0 ) break;
            if ( nread<=neof )      // last block
            {
                if ( ncached )
                {
                    // flush the part of the cache that won't be needed
                    nwr = bgzf_raw_write(bgzf_out, cached, nread);
                    if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);

                    // make space in the cache so that we can append to the end
                    if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
                }

                // fill the cache and check for eof outside this loop
                memcpy(cached+neof-nread,buf,nread);
                break;
            }

            // not the last block, flush the cache if full
            if ( ncached )
            {
                nwr = bgzf_raw_write(bgzf_out, cached, ncached);
                if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
                ncached = 0;
            }

            // fill the cache
            nread -= neof;
            memcpy(cached,buf+nread,neof);
            ncached = neof;

            nwr = bgzf_raw_write(bgzf_out, buf, nread);
            if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
        }
        if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
        {
            nwr = bgzf_raw_write(bgzf_out, cached, neof);
            if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
        }
        if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
    }
    free(buf);
    free(tmp.s);
    if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
}
Esempio n. 23
0
void signalFromBAM(const string bamFileName, const string sigFileName, Parameters P) {

    bam1_t *bamA;
    bamA=bam_init1();

    double nMult=0, nUniq=0;

    if (P.outWigFlags.norm==1) {//count reads in the BAM file
        BGZF *bamIn=bgzf_open(bamFileName.c_str(),"r");
        bam_hdr_t *bamHeader=bam_hdr_read(bamIn);
        while ( true ) {//until the end of file
            int bamBytes1=bam_read1(bamIn, bamA);
            if (bamBytes1<0) break; //end of file
            if (bamA->core.tid<0) continue; //unmapped read
//             if ( !std::regex_match(chrName.at(bamA->core.tid),std::regex(P.outWigReferencesPrefix))) continue; //reference does not mathc required references
            if ( P.outWigReferencesPrefix!="-" && (P.outWigReferencesPrefix.compare(0,P.outWigReferencesPrefix.size(),bamHeader->target_name[bamA->core.tid],P.outWigReferencesPrefix.size())!=0) ) continue; //reference does not match required references

            uint8_t* aNHp=bam_aux_get(bamA,"NH");
            if (aNHp!=NULL) {
                uint32_t aNH=bam_aux2i(aNHp);
                if (aNH==1) {//unique mappers
                    ++nUniq;
                } else if (aNH>1) {
                    nMult+=1.0/aNH;
                };
            };
        };
        bgzf_close(bamIn);
    };

    BGZF *bamIn=bgzf_open(bamFileName.c_str(),"r");
    bam_hdr_t *bamHeader=bam_hdr_read(bamIn);

    int sigN=P.outWigFlags.strand ? 4 : 2;

    double *normFactor=new double[sigN];

    ofstream **sigOutAll=new ofstream* [sigN];

    string* sigOutFileName=new string[sigN];
    sigOutFileName[0]=sigFileName+".Unique.str1.out";
    sigOutFileName[1]=sigFileName+".UniqueMultiple.str1.out";
    if (P.outWigFlags.strand) {
        sigOutFileName[2]=sigFileName+".Unique.str2.out";
        sigOutFileName[3]=sigFileName+".UniqueMultiple.str2.out";
    };

    for (int ii=0; ii<sigN; ii++) {
        sigOutFileName[ii]+= (P.outWigFlags.format==0 ? ".bg" : ".wig");
        sigOutAll[ii]=new ofstream ( sigOutFileName[ii].c_str() );
    };

    if (P.outWigFlags.norm==0) {//raw counts
        normFactor[0]=1;
        normFactor[1]=1;
    } else if (P.outWigFlags.norm==1) {//normlaized
        normFactor[0]=1.0e6 / nUniq;
        normFactor[1]=1.0e6 / (nUniq+nMult);
        for (int is=0;is<sigN;is++) {//formatting double output
            *sigOutAll[is]<<setiosflags(ios::fixed) << setprecision(5);
        };
    };
    if (P.outWigFlags.strand) {
        normFactor[2]=normFactor[0];
        normFactor[3]=normFactor[1];
    };


    int iChr=-999;
    double *sigAll=NULL;
    uint32_t chrLen=0;
    while ( true ) {//until the end of file
        int bamBytes1=bam_read1(bamIn, bamA);
        if (bamA->core.tid!=iChr || bamBytes1<0) {
            //output to file
            if (iChr!=-999) {//iChr=-999 marks chromosomes that are not output, including unmapped reads
                for (int is=0;is<sigN;is++) {
                    if (P.outWigFlags.format==1) {
                        *sigOutAll[is] <<"variableStep chrom="<<bamHeader->target_name[iChr] <<"\n";
                    };
                    double prevSig=0;
                    for (uint32_t ig=0;ig<chrLen;ig++) {
                        double newSig=sigAll[sigN*ig+is];
                        if (P.outWigFlags.format==0) {//bedGraph
                            if (newSig!=prevSig) {
                                if (prevSig!=0) {//finish previous record
                                    *sigOutAll[is] <<ig<<"\t"<<prevSig*normFactor[is] <<"\n"; //1-based end
                                };
                                if (newSig!=0) {
                                    *sigOutAll[is] << bamHeader->target_name[iChr] <<"\t"<< ig <<"\t"; //0-based beginning
                                };
                                prevSig=newSig;
                            };
                        } else if (P.outWigFlags.format==1){//wiggle
                            if (newSig!=0) {
                                *sigOutAll[is] <<ig+1<<"\t"<<newSig*normFactor[is] <<"\n";
                            };
                        };
                    };
                };
            };
            if (bamBytes1<0) {//no more reads
                break;
            };

            iChr=bamA->core.tid;
            if ( iChr==-1 || (P.outWigReferencesPrefix!="-" && (P.outWigReferencesPrefix.compare(0,P.outWigReferencesPrefix.size(),bamHeader->target_name[bamA->core.tid],P.outWigReferencesPrefix.size())!=0) ) ) {
                iChr=-999;
                continue; //reference does not match required references
            };

            chrLen=bamHeader->target_len[iChr]+1;//one extra base at the end which sohuld always be 0
            delete [] sigAll;
            sigAll= new double[sigN*chrLen];
            memset(sigAll, 0, sizeof(*sigAll)*sigN*chrLen);
        };

//         uint32_t nCigar =(bamA->core.flag<<16)>>16;
//         uint32_t mapFlag=bamA->core.flag>>16;
//         uint32_t mapQ=(bamA->core.flag<<16)>>24;

        #define BAM_CIGAR_OperationShift 4
        #define BAM_CIGAR_LengthBits 28
        #define BAM_CIGAR_M 0
        #define BAM_CIGAR_I 1
        #define BAM_CIGAR_D 2
        #define BAM_CIGAR_N 3
        #define BAM_CIGAR_S 4
        #define BAM_CIGAR_H 5
        #define BAM_CIGAR_P 6
        #define BAM_CIGAR_EQ 7
        #define BAM_CIGAR_X 8

        //by default, alignments marked as duplicate are not processed
        if ( (bamA->core.flag & 0x400) > 0 ) continue;

        //NH attribute
        uint8_t* aNHp=bam_aux_get(bamA,"NH");
        uint32_t aNH;
        if (aNHp==NULL) {
            aNH=1; //no NH tag: assume NH=1
            //continue; //do not process lines without NH field
        } else {
            aNH=bam_aux2i(bam_aux_get(bamA,"NH")); //write a safer function allowing for lacking NH tag
        };
        if (aNH==0) continue; //do not process lines without NH=0
        uint32_t aG=bamA->core.pos;
        uint32_t iStrand=0;
        if (P.outWigFlags.strand) {//strand for stranded data from SAM flag
            iStrand= ( (bamA->core.flag & 0x10) > 0 ) == ( (bamA->core.flag & 0x80) == 0 );//0/1 for +/-
        };
        if (P.outWigFlags.type==1) {//5' of the1st read signal only, RAMPAGE/CAGE
            if ( (bamA->core.flag & 0x80)>0) continue; //skip if this the second mate
            if (iStrand==0) {
                if (aNH==1) {//unique mappers
                    sigAll[aG*sigN+0+2*iStrand]++;
                };
                sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci
                continue; //record only the first position
            };
        };

        uint32_t* cigar=(uint32_t*) (bamA->data+bamA->core.l_qname);

        for (uint32_t ic=0; ic<bamA->core.n_cigar; ic++) {
            uint32_t cigOp=(cigar[ic]<<BAM_CIGAR_LengthBits)>>BAM_CIGAR_LengthBits;
            uint32_t cigL=cigar[ic]>>BAM_CIGAR_OperationShift;
            switch (cigOp) {
                case(BAM_CIGAR_D):
                case(BAM_CIGAR_N):
                    aG+=cigL;
                    break;
                case(BAM_CIGAR_M):
                    if (P.outWigFlags.type==0 || (P.outWigFlags.type==2 && (bamA->core.flag & 0x80)>0 )) {//full signal, or second mate onyl signal
                        for (uint32_t ig=0;ig<cigL;ig++) {
                            if (aG>=chrLen) {
                                cerr << "BUG: alignment extends past chromosome in signalFromBAM.cpp\n";
                                exit(-1);
                            };
                            if (aNH==1) {//unique mappers
                                sigAll[aG*sigN+0+2*iStrand]++;
                            };
                            sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci
                            aG++;
                        };
                    } else {
                        aG+=cigL;
                    };
            };
        };
        if (P.outWigFlags.type==1) {//full signal
            --aG;
            if (aNH==1) {//unique mappers
                sigAll[aG*sigN+0+2*iStrand]++;
            };
            sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci
        };
    };
    delete [] sigAll;

    for (int is=0; is<sigN; is++) {// flush/close all signal files
        sigOutAll[is]->flush();
        sigOutAll[is]->close();
    };
};
Esempio n. 24
0
int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam)
{
    BGZF *fp;
    FILE* fp_file;
    uint8_t *buf;
    uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
    const int es=BGZF_EMPTY_BLOCK_SIZE;
    int i;
    
    fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(_fileno(stdout), "w");
    if (fp == 0) {
        fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __FUNCTION__, outbam);
        return 1;
    }
    if (h) bam_header_write(fp, h);
    
    buf = (uint8_t*) malloc(BUF_SIZE);
    for(i = 0; i < nfn; ++i){
        BGZF *in;
        bam_header_t *old;
        int len,j;
        
        in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(_fileno(stdin), "r");
        if (in == 0) {
            fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __FUNCTION__, fn[i]);
            return -1;
        }
        if (in->open_mode != 'r') return -1;
        
        old = bam_header_read(in);
        if (h == 0 && i == 0) bam_header_write(fp, old);
        
        if (in->block_offset < in->block_length) {
            bgzf_write(fp, (uint8_t*)in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
            bgzf_flush(fp);
        }
        
        j=0;
#ifdef _USE_KNETFILE
        fp_file=fp->x.fpw;
        while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) {
#else  
        fp_file=fp->file;
        while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) {
#endif
            if(len<es){
                int diff=es-len;
                if(j==0) {
                    fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __FUNCTION__, fn[i]);
                    return -1;
                }
                fwrite(ebuf, 1, len, fp_file);
                memcpy(ebuf,ebuf+len,diff);
                memcpy(ebuf+diff,buf,len);
            } else {
                if(j!=0) fwrite(ebuf, 1, es, fp_file);
                len-= es;
                memcpy(ebuf,buf+len,es);
                fwrite(buf, 1, len, fp_file);
            }
            j=1;
        }

        /* check final gzip block */
        {
            const uint8_t gzip1=ebuf[0];
            const uint8_t gzip2=ebuf[1];
            const uint32_t isize=*((uint32_t*)(ebuf+es-4));
            if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
                fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __FUNCTION__, fn[i]);
                fprintf(stderr, " Possible output corruption.\n");
                fwrite(ebuf, 1, es, fp_file);
            }
        }
        bam_header_destroy(old);
        bgzf_close(in);
    }
    free(buf);
    bgzf_close(fp);
    return 0;
}



int main_cat(int argc, char *argv[])
{
    bam_header_t *h = 0;
    char *outfn = 0;
    int c, ret;
    while ((c = getopt(argc, argv, "h:o:")) >= 0) {
        switch (c) {
            case 'h': {
                tamFile fph = sam_open(optarg);
                if (fph == 0) {
                    fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __FUNCTION__, argv[1]);
                    return 1;
                }
                h = sam_header_read(fph);
                sam_close(fph);
                break;
            }
            case 'o': outfn = strdup(optarg); break;
        }
    }
    if (argc - optind < 2) {
        fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [...]\n");
        return 1;
    }
    ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
    free(outfn);
    return ret;
}
Esempio n. 25
0
File: tabix.c Progetto: Illumina/akt
int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *conf)
{
    if ( ftype & IS_TXT || !ftype )
    {
        BGZF *fp = bgzf_open(fname,"r");
        if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) return -1;

        char *buffer = fp->uncompressed_block;
        int skip_until = 0;

        // Skip the header: find out the position of the data block
        if ( buffer[0]==conf->meta_char )
        {
            skip_until = 1;
            while (1)
            {
                if ( buffer[skip_until]=='\n' )
                {
                    skip_until++;
                    if ( skip_until>=fp->block_length )
                    {
                        if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname);
                        skip_until = 0;
                    }
                    // The header has finished
                    if ( buffer[skip_until]!=conf->meta_char ) break;
                }
                skip_until++;
                if ( skip_until>=fp->block_length )
                {
                    if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname);
                    skip_until = 0;
                }
            }
        }

        // Output the new header
        FILE *hdr  = fopen(header,"r");
        if ( !hdr ) error("%s: %s", header,strerror(errno));
        const size_t page_size = 32768;
        char *buf = malloc(page_size);
        BGZF *bgzf_out = bgzf_open("-", "w");
        ssize_t nread;
        while ( (nread=fread(buf,1,page_size-1,hdr))>0 )
        {
            if ( nread<page_size-1 && buf[nread-1]!='\n' ) buf[nread++] = '\n';
            if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode);
        }
        if ( fclose(hdr) ) error("close failed: %s\n", header);

        // Output all remainig data read with the header block
        if ( fp->block_length - skip_until > 0 )
        {
            if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error("Error: %d\n",fp->errcode);
        }
        if (bgzf_flush(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);

        while (1)
        {
            nread = bgzf_raw_read(fp, buf, page_size);
            if ( nread<=0 ) break;

            int count = bgzf_raw_write(bgzf_out, buf, nread);
            if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
        }
        if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
        if (bgzf_close(fp) < 0) error("Error: %d\n",fp->errcode);
        free(buf);
    }
    else
        error("todo: reheader BCF, BAM\n");  // BCF is difficult, records contain pointers to the header.
    return 0;
}
Esempio n. 26
0
File: main.c Progetto: Brainiarc7/TS
int reheader_file(const char *header, const char *file, int meta)
{
    BGZF *fp = bgzf_open(file,"r");
    if (bgzf_read_block(fp) != 0 || !fp->block_length)
        return -1;
    
    char *buffer = fp->uncompressed_block;
    int skip_until = 0;

    if ( buffer[0]==meta )
    {
        skip_until = 1;

        // Skip the header
        while (1)
        {
            if ( buffer[skip_until]=='\n' )
            {
                skip_until++;
                if ( skip_until>=fp->block_length )
                {
                    if (bgzf_read_block(fp) != 0 || !fp->block_length)
                        error("no body?\n");
                    skip_until = 0;
                }
                // The header has finished
                if ( buffer[skip_until]!=meta ) break;
            }
            skip_until++;
            if ( skip_until>=fp->block_length )
            {
                if (bgzf_read_block(fp) != 0 || !fp->block_length)
                    error("no body?\n");
                skip_until = 0;
            }
        }
    }

    FILE *fh = fopen(header,"r");
    if ( !fh )
        error("%s: %s", header,strerror(errno));
    int page_size = getpagesize();
    char *buf = valloc(page_size);
    BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
    ssize_t nread;
    while ( (nread=fread(buf,1,page_size-1,fh))>0 )
    {
        if ( nread<page_size-1 && buf[nread-1]!='\n' )
            buf[nread++] = '\n';
        if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
    }
    fclose(fh);

    if ( fp->block_length - skip_until > 0 )
    {
        if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) 
            error("Error: %s\n",fp->error);
    }
    if (bgzf_flush(bgzf_out) < 0) 
        error("Error: %s\n",bgzf_out->error);

    while (1)
    {
#ifdef _USE_KNETFILE
        nread = knet_read(fp->x.fpr, buf, page_size);
#else
        nread = fread(buf, 1, page_size, fp->file);
#endif
        if ( nread<=0 ) 
            break;

#ifdef _USE_KNETFILE
        int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
#else
        int count = fwrite(buf, 1, nread, bgzf_out->file);
#endif
        if (count != nread)
            error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
    }

    if (bgzf_close(bgzf_out) < 0) 
        error("Error: %s\n",bgzf_out->error);
   
    return 0;
}
Esempio n. 27
0
int init_regions(const char *fname, regions_t *reg)
{
    int bgzf_getline(BGZF *fp, int delim, kstring_t *str);

    BGZF *zfp = bgzf_open(fname, "r");
    if ( !zfp ) 
    {
        fprintf(stderr,"%s: %s\n",fname,strerror(errno));
        return 0;
    }

    int i, mseqs = 10, mpos = 0;
    reg->nseqs = 0;
    reg->pos   = (pos_t **)calloc(mseqs,sizeof(pos_t*));
    reg->npos  = (int*) calloc(mseqs,sizeof(int));
    reg->seq_names = (char **) calloc(mseqs,sizeof(char*));

    kstring_t str = {0,0,0};
    ssize_t nread;
    while ((nread = bgzf_getline(zfp, '\n', &str)) > 0) 
    {
        char *line = str.s;
        if ( line[0] == '#' ) continue;

        int i = 0;
        while ( i<nread && !isspace(line[i]) ) i++;
        if ( i>=nread ) 
        { 
            fprintf(stderr,"Could not parse the file: %s [%s]\n", fname,line); 
            return 0; 
        }
        line[i] = 0;

        if ( reg->nseqs==0 || strcmp(line,reg->seq_names[reg->nseqs-1]) )
        {
            // New sequence
            reg->nseqs++;
            if ( reg->nseqs >= mseqs )
            {
                mseqs++;
                reg->pos  = (pos_t **) realloc(reg->pos,sizeof(pos_t*)*mseqs); reg->pos[mseqs-1] = NULL;
                reg->npos = (int *) realloc(reg->npos,sizeof(int)*mseqs); reg->npos[mseqs-1] = 0;
                reg->seq_names = (char**) realloc(reg->seq_names,sizeof(char*)*mseqs);
            }
            reg->seq_names[reg->nseqs-1] = strdup(line);
            mpos = 0;
        }

        int iseq = reg->nseqs-1;
        if ( reg->npos[iseq] >= mpos )
        {
            mpos += 100;
            reg->pos[iseq] = (pos_t*) realloc(reg->pos[iseq],sizeof(pos_t)*mpos);
        }
        int ipos = reg->npos[iseq];
        pos_t *pos = reg->pos[iseq];
        reg->npos[iseq]++;
        if ( (sscanf(line+i+1,"%d %d",&pos[ipos].from,&pos[ipos].to))!=2 ) 
        {
            if ( (sscanf(line+i+1,"%d",&pos[ipos].from))!=1 )
            {
                fprintf(stderr,"Could not parse the region [%s]\n",line+i+1);
                return 0;
            }
            pos[ipos].to = pos[ipos].from;
        }

        // Check that the file is sorted
        if ( ipos>0 && (pos[ipos].from < pos[ipos-1].from || (pos[ipos].from==pos[ipos-1].from && pos[ipos].to<pos[ipos-1].to)) )
        {
            fprintf(stderr,"The file is not sorted: %s\n", fname);
            return 0;
        }
    }

    // Check that chromosomes come in blocks
    int j;
    for (i=0; i<reg->nseqs; i++)
    {
        for (j=0; j<i; j++)
        {
            if ( !strcmp(reg->seq_names[i],reg->seq_names[j]) ) 
            {
                fprintf(stderr,"The file is not sorted: %s\n", fname);
                return 0;
            }
        }
    }

    if (str.m) free(str.s);
    else return 0;

    bgzf_close(zfp);
    return 1;
}
Esempio n. 28
0
int main (int argc, char **argv) {
    /////////////////////
    // Parse Arguments //
    /////////////////////
    params *pars = new params;
    init_pars(pars);
    parse_cmd_args(argc, argv, pars);
    if( pars->version ) {
        printf("ngsF v%s\nCompiled on %s @ %s", version, __DATE__, __TIME__);
#ifdef _USE_BGZF
        printf(" (BGZF library)\n");
#else
        printf(" (STD library)\n");
#endif

        exit(0);
    }
    if( pars->verbose >= 1 ) {
        printf("==> Input Arguments:\n");
        printf("\tglf file: %s\n\tinit_values: %s\n\tfreq_fixed: %s\n\tout file: %s\n\tn_ind: %d\n\tn_sites: %lu\n\tchunk_size: %lu\n\tfast_lkl: %s\n\tapprox_EM: %s\n\tcall_geno: %s\n\tmax_iters: %d\n\tmin_epsilon: %.10f\n\tn_threads: %d\n\tseed: %lu\n\tquick: %s\n\tversion: %s\n\tverbose: %d\n\n",
               pars->in_glf, pars->init_values, pars->freq_fixed ? "true":"false", pars->out_file, pars->n_ind, pars->n_sites, pars->max_chunk_size, pars->fast_lkl ? "true":"false", pars->approx_EM ? "true":"false", pars->call_geno ? "true":"false", pars->max_iters, pars->min_epsilon, pars->n_threads, pars->seed, pars->quick ? "true":"false", version, pars->verbose);
    }
    if( pars->verbose > 4 ) printf("==> Verbose values greater than 4 for debugging purpose only. Expect large amounts of info on screen\n");



    /////////////////////
    // Check Arguments //
    /////////////////////
    if(pars->in_glf == NULL)
        error(__FUNCTION__,"GL input file (-glf) missing!");
    else if( strcmp(pars->in_glf, "-") == 0 ) {
        pars->in_glf_type = new char[6];
        pars->in_glf_type = strcat(pars->in_glf_type, "STDIN");
    } else {
        pars->in_glf_type = strrchr(pars->in_glf, '.');
        if(pars->in_glf_type == NULL)
            error(__FUNCTION__,"invalid file type!");
    }
    if(pars->out_file == NULL)
        error(__FUNCTION__,"output file (-out) missing!");
    if(pars->n_ind == 0)
        error(__FUNCTION__,"number of individuals (-n_ind) missing!");
    if(pars->n_sites == 0)
        error(__FUNCTION__,"number of sites (-n_sites) missing!");



    ///////////////////////
    // Check input files //
    ///////////////////////
    // Get file total size
    struct stat st;
    stat(pars->in_glf, &st);
    if( strcmp(pars->in_glf_type, "STDIN") != 0 ) {
        if( pars->n_sites == st.st_size/sizeof(double)/pars->n_ind/3 && strcmp(pars->in_glf_type, ".glf") == 0 ) {
            if(pars->verbose >= 1)
                printf("==> UNCOMP input file (\"%s\"): number of sites (%lu) match expected file size\n", pars->in_glf_type, pars->n_sites);
        } else if( strcmp(pars->in_glf_type, ".glf") != 0 ) {
            if( pars->verbose >= 1)
                printf("==> COMPRESSED input file (\"%s\"): number of sites (%lu) do NOT match expected file size\n", pars->in_glf_type, pars->n_sites);
        } else
            error(__FUNCTION__,"wrong number of sites or invalid/corrupt file!");
    }


    // Adjust max_chunk_size in case of fewer sites
    if(pars->max_chunk_size > pars->n_sites) {
        if( pars->verbose >= 1 ) printf("==> Fewer sites (%lu) than chunk_size (%lu). Reducing chunk size to match number of sites\n", pars->n_sites, pars->max_chunk_size);
        pars->max_chunk_size = pars->n_sites;
    }
    // Calculate total number of chunks
    pars->n_chunks = ceil( (double) pars->n_sites/ (double) pars->max_chunk_size );
    if( pars->verbose >= 1 ) printf("==> Analysis will be run in %ld chunk(s)\n", pars->n_chunks);
    // Alocate memory for the chunk index
    pars->chunks_voffset = new int64_t[pars->n_chunks];
    memset(pars->chunks_voffset, 0, pars->n_chunks*sizeof(int64_t));
    // Adjust thread number to chunks
    if(pars->n_chunks < pars->n_threads) {
        if( pars->verbose >= 1 ) printf("==> Fewer chunks (%ld) than threads (%d). Reducing the number of threads to match number of chunks\n", pars->n_chunks, pars->n_threads);
        pars->n_threads = pars->n_chunks;
    }


    // Open input file
#ifdef _USE_BGZF
    if( pars->verbose >= 1 ) printf("==> Using BGZF I/O library\n");
    // Open BGZIP file
    if( strcmp(pars->in_glf_type, ".bgz") == 0 ) {
        if( (pars->in_glf_fh = bgzf_open(pars->in_glf, "rb")) < 0 )
            error(__FUNCTION__,"Cannot open BGZIP file!");
    } else
        error(__FUNCTION__,"BGZF library only supports BGZIP files!");

    bgzf_set_cache_size(pars->in_glf_fh, CACHE_SIZE * 1024uL * 1024uL * 1024uL);
#else

    if( pars->verbose >= 1 ) printf("==> Using native I/O library\n");
    // Open GLF file
    if( strcmp(pars->in_glf_type, "STDIN") == 0 )
        pars->in_glf_fh = stdin;
    else if( strcmp(pars->in_glf_type, ".glf") == 0 ) {
        if( (pars->in_glf_fh = fopen(pars->in_glf, "rb")) == NULL )
            error(__FUNCTION__,"Cannot open GLF file!");
    } else
        error(__FUNCTION__,"Standard library only supports UNCOMPRESSED GLF files!");

    // Allocate memory and read from the file
    pars->data = new double* [pars->n_sites];
    for(uint64_t s = 0; s < pars->n_sites; s++) {
        pars->data[s] = new double[pars->n_ind * 3];
        if( fread (pars->data[s], sizeof(double), pars->n_ind * 3, pars->in_glf_fh) != pars->n_ind * 3)
            error(__FUNCTION__,"cannot read GLF file!");
        if(pars->call_geno)
            call_geno(pars->data[s], pars->n_ind, 3);
    }
#endif
    if( pars->in_glf_fh == NULL )
        error(__FUNCTION__,"cannot open GLF file!");



    ///////////////////////////////////
    // Declare variables for results //
    ///////////////////////////////////
    out_data *output = new out_data;
    output->site_freq = new double[pars->n_sites];
    output->site_freq_num = new double[pars->n_sites];
    output->site_freq_den = new double[pars->n_sites];
    output->site_prob_var = new double[pars->n_sites];
    output->site_tmpprob_var = new double[pars->n_sites];
    output->indF = new double[pars->n_ind];
    output->indF_num = new double[pars->n_ind];
    output->indF_den = new double[pars->n_ind];
    output->ind_lkl = new double[pars->n_ind];
    // Initialize output
    init_output(pars, output);



    //////////////////
    // Analyze Data //
    //////////////////
    if( pars->verbose >= 1 && !pars->fast_lkl && strcmp("e", pars->init_values) != 0 ) {
        printf("==> Initial LogLkl: %.15f\n", full_HWE_like(pars, output->site_freq, output->indF, 0, pars->n_ind));
        fflush(stdout);
    }
    do_EM(pars, output);
    if( pars->verbose >= 1 ) printf("\nFinal logLkl: %f\n", output->global_lkl);



    //////////////////
    // Print Output //
    //////////////////
    FILE *out_file;
    if( pars->verbose >= 1 ) printf("Printing Output...\n");

    out_file = fopen(pars->out_file, "w");
    if(out_file == NULL)
        error(__FUNCTION__,"Cannot open OUTPUT file!");
    for(uint16_t i = 0; i < pars->n_ind; i++)
        fprintf(out_file,"%f\n", output->indF[i]);
    fclose(out_file);



    //////////////////////
    // Close Input File //
    //////////////////////
    if( pars->verbose >= 1 ) printf("Exiting...\n");
#ifdef _USE_BGZF
    bgzf_close(pars->in_glf_fh);
#else
    for(uint64_t s = 0; s < pars->n_sites; s++)
        delete [] pars->data[s];
    delete [] pars->data;
    fclose(pars->in_glf_fh);
#endif



    /////////////////
    // Free Memory //
    /////////////////
    delete [] output->site_freq;
    delete [] output->site_freq_num;
    delete [] output->site_freq_den;
    delete [] output->site_prob_var;
    delete [] output->indF;
    delete [] output->indF_num;
    delete [] output->indF_den;
    delete [] output->ind_lkl;
    delete output;
    //if( strcmp("e", pars->init_values) == 0 )
    //delete [] pars->init_values;
    delete [] pars->chunks_voffset;
    delete pars;

    return 0;
}
Esempio n. 29
0
void filterReads(char * inBamFile,
                 char * outBamFile,
                 int minMapQual,
                 int minLen,
                 int maxMisMatches,
                 float minPcId,
                 float minPcAln,
                 int ignoreSuppAlignments,
                 int ignoreSecondaryAlignments) {
    //
    int result = -1;
    int outResult = -1;

    int supp_check = 0x0;
    if (ignoreSuppAlignments) {
        supp_check |= BAM_FSUPPLEMENTARY;
    }
    if (ignoreSecondaryAlignments) {
        supp_check |= BAM_FSECONDARY;
    }

    // helper variables
    BGZF* in = 0;
    BGZF* out = 0;
    bam1_t *b = bam_init1();
    bam_hdr_t *h;

    // open bam
    if ((in = bgzf_open(inBamFile, "r")) == 0) {
        fprintf(stderr,
               "ERROR: Failed to open \"%s\" for reading.\n",
               inBamFile);
    }
    else if ((h = bam_hdr_read(in)) == 0) { // read header
        fprintf(stderr,
                "ERROR: Failed to read BAM header of file \"%s\".\n",
                inBamFile);
    }
    else if ((out = bgzf_open(outBamFile, "w")) == 0) {
        fprintf(stderr,
               "ERROR: Failed to open \"%s\" for writing.\n",
               outBamFile);
    }
    else {
        // write and destroy header
        bam_hdr_write(out, h);
        bam_hdr_destroy(h);

        int line = 0;
        int matches, mismatches, qLen;
        float pcAln, pcId;
        int showStats = 0;

        // fetch alignments
        while ((result = bam_read1(in, b)) >= 0) {
            line += 1;

            // only primary mappings
            if ((b->core.flag & supp_check) != 0) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, non-primary\n", line);
                continue;
            }

            // only high quality
            if (b->core.qual < minMapQual) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, quality: %d\n", line, b->core.qual);
                continue;
            }

            // not too many absolute mismatches
            mismatches = bam_aux2i(bam_aux_get(b, "NM"));
            if (mismatches > maxMisMatches) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, mismatches: %d\n", line, mismatches);
                continue;
            }

            // not too short
            qLen = bam_cigar2qlen((&b->core)->n_cigar, bam_get_cigar(b));
            if (qLen < minLen) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, length: %d\n", line, qLen);
                continue;
            }

            // only high percent identity
            matches = bam_cigar2matches((&b->core)->n_cigar, bam_get_cigar(b));
            pcId = (matches - mismatches) / (float)matches; // percentage as float between 0 to 1
            if (pcId < minPcId) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, identity pc: %.4f\n", line, pcId);
                continue;
            }

            // only high percent alignment
            pcAln = matches / (float)qLen; // percentage as float between 0 to 1
            if (pcAln < minPcAln) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, alignment pc: %.4f\n", line, pcAln);
                continue;
            }

            if ((outResult = bam_write1(out, b)) < -1) {
                fprintf(stderr,
                        "ERROR: Attempt to write read no. %d to file \"%s\" failed with code %d.\n",
                        line, outBamFile, outResult);
            }
        }
        if (result < -1) {
            fprintf(stderr,
                    "ERROR: retrieval of read no. %d from file \"%s\" failed with code %d.\n",
                    line, inBamFile, result);
        }
    }
    if (in) bgzf_close(in);
    if (out) bgzf_close(out);
    bam_destroy1(b);
}